1 : /******************************************************************************
2 : * $Id: cpl_vsil_curl.cpp 23506 2011-12-10 13:43:59Z rouault $
3 : *
4 : * Project: CPL - Common Portability Library
5 : * Purpose: Implement VSI large file api for HTTP/FTP files
6 : * Author: Even Rouault, even.rouault at mines-paris.org
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 2008, Even Rouault
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "cpl_vsi_virtual.h"
31 : #include "cpl_string.h"
32 : #include "cpl_multiproc.h"
33 : #include "cpl_hash_set.h"
34 : #include "cpl_time.h"
35 :
36 : CPL_CVSID("$Id: cpl_vsil_curl.cpp 23506 2011-12-10 13:43:59Z rouault $");
37 :
38 : #ifndef HAVE_CURL
39 :
40 : void VSIInstallCurlFileHandler(void)
41 : {
42 : /* not supported */
43 : }
44 :
45 : #else
46 :
47 : #include <curl/curl.h>
48 :
49 : #include <map>
50 :
51 : #define ENABLE_DEBUG 1
52 :
53 : #define N_MAX_REGIONS 1000
54 :
55 : #define DOWNLOAD_CHUNCK_SIZE 16384
56 :
57 : typedef enum
58 : {
59 : EXIST_UNKNOWN = -1,
60 : EXIST_NO,
61 : EXIST_YES,
62 : } ExistStatus;
63 :
64 : typedef struct
65 : {
66 : ExistStatus eExists;
67 : int bHastComputedFileSize;
68 : vsi_l_offset fileSize;
69 : int bIsDirectory;
70 : time_t mTime;
71 : } CachedFileProp;
72 :
73 : typedef struct
74 : {
75 : int bGotFileList;
76 : char** papszFileList; /* only file name without path */
77 : } CachedDirList;
78 :
79 : typedef struct
80 : {
81 : unsigned long pszURLHash;
82 : vsi_l_offset nFileOffsetStart;
83 : size_t nSize;
84 : char *pData;
85 : } CachedRegion;
86 :
87 :
88 0 : static const char* VSICurlGetCacheFileName()
89 : {
90 0 : return "gdal_vsicurl_cache.bin";
91 : }
92 :
93 : /************************************************************************/
94 : /* VSICurlFindStringSensitiveExceptEscapeSequences() */
95 : /************************************************************************/
96 :
97 637 : static int VSICurlFindStringSensitiveExceptEscapeSequences( char ** papszList,
98 : const char * pszTarget )
99 :
100 : {
101 : int i;
102 :
103 637 : if( papszList == NULL )
104 0 : return -1;
105 :
106 50086 : for( i = 0; papszList[i] != NULL; i++ )
107 : {
108 50056 : const char* pszIter1 = papszList[i];
109 50056 : const char* pszIter2 = pszTarget;
110 : char ch1, ch2;
111 : /* The comparison is case-sensitive, escape for escaped */
112 : /* sequences where letters of the hexadecimal sequence */
113 : /* can be uppercase or lowercase depending on the quoting algorithm */
114 28437 : while(TRUE)
115 : {
116 78493 : ch1 = *pszIter1;
117 78493 : ch2 = *pszIter2;
118 78493 : if (ch1 == '\0' || ch2 == '\0')
119 622 : break;
120 77871 : if (ch1 == '%' && ch2 == '%' &&
121 0 : pszIter1[1] != '\0' && pszIter1[2] != '\0' &&
122 0 : pszIter2[1] != '\0' && pszIter2[2] != '\0')
123 : {
124 0 : if (!EQUALN(pszIter1+1, pszIter2+1, 2))
125 0 : break;
126 0 : pszIter1 += 2;
127 0 : pszIter2 += 2;
128 : }
129 77871 : if (ch1 != ch2)
130 49434 : break;
131 28437 : pszIter1 ++;
132 28437 : pszIter2 ++;
133 : }
134 50056 : if (ch1 == ch2 && ch1 == '\0')
135 607 : return i;
136 : }
137 :
138 30 : return -1;
139 : }
140 :
141 : /************************************************************************/
142 : /* VSICurlIsFileInList() */
143 : /************************************************************************/
144 :
145 634 : static int VSICurlIsFileInList( char ** papszList, const char * pszTarget )
146 : {
147 634 : int nRet = VSICurlFindStringSensitiveExceptEscapeSequences(papszList, pszTarget);
148 634 : if (nRet >= 0)
149 607 : return nRet;
150 :
151 : /* If we didn't find anything, try to URL-escape the target filename */
152 27 : char* pszEscaped = CPLEscapeString(pszTarget, -1, CPLES_URL);
153 27 : if (strcmp(pszTarget, pszEscaped) != 0)
154 : {
155 3 : nRet = VSICurlFindStringSensitiveExceptEscapeSequences(papszList, pszEscaped);
156 : }
157 27 : CPLFree(pszEscaped);
158 27 : return nRet;
159 : }
160 :
161 : /************************************************************************/
162 : /* VSICurlFilesystemHandler */
163 : /************************************************************************/
164 :
165 : typedef struct
166 : {
167 : CPLString osURL;
168 : CURL *hCurlHandle;
169 2 : } CachedConnection;
170 :
171 :
172 : class VSICurlFilesystemHandler : public VSIFilesystemHandler
173 : {
174 : void *hMutex;
175 :
176 : CachedRegion **papsRegions;
177 : int nRegions;
178 :
179 : std::map<CPLString, CachedFileProp*> cacheFileSize;
180 : std::map<CPLString, CachedDirList*> cacheDirList;
181 :
182 : int bUseCacheDisk;
183 :
184 : /* Per-thread Curl connection cache */
185 : std::map<GIntBig, CachedConnection*> mapConnections;
186 :
187 : char** GetFileList(const char *pszFilename, int* pbGotFileList);
188 :
189 : char** ParseHTMLFileList(const char* pszFilename,
190 : char* pszData,
191 : int* pbGotFileList);
192 : public:
193 : VSICurlFilesystemHandler();
194 : ~VSICurlFilesystemHandler();
195 :
196 : virtual VSIVirtualHandle *Open( const char *pszFilename,
197 : const char *pszAccess);
198 : virtual int Stat( const char *pszFilename, VSIStatBufL *pStatBuf, int nFlags );
199 : virtual int Unlink( const char *pszFilename );
200 : virtual int Rename( const char *oldpath, const char *newpath );
201 : virtual int Mkdir( const char *pszDirname, long nMode );
202 : virtual int Rmdir( const char *pszDirname );
203 : virtual char **ReadDir( const char *pszDirname );
204 : virtual char **ReadDir( const char *pszDirname, int* pbGotFileList );
205 :
206 :
207 : const CachedRegion* GetRegion(const char* pszURL,
208 : vsi_l_offset nFileOffsetStart);
209 :
210 : void AddRegion(const char* pszURL,
211 : vsi_l_offset nFileOffsetStart,
212 : size_t nSize,
213 : const char *pData);
214 :
215 : CachedFileProp* GetCachedFileProp(const char* pszURL);
216 :
217 : void AddRegionToCacheDisk(CachedRegion* psRegion);
218 : const CachedRegion* GetRegionFromCacheDisk(const char* pszURL,
219 : vsi_l_offset nFileOffsetStart);
220 :
221 : CURL *GetCurlHandleFor(CPLString osURL);
222 : };
223 :
224 : /************************************************************************/
225 : /* VSICurlHandle */
226 : /************************************************************************/
227 :
228 : class VSICurlHandle : public VSIVirtualHandle
229 : {
230 : private:
231 : VSICurlFilesystemHandler* poFS;
232 :
233 : char* pszURL;
234 : unsigned long pszURLHash;
235 :
236 : vsi_l_offset curOffset;
237 : vsi_l_offset fileSize;
238 : int bHastComputedFileSize;
239 : ExistStatus eExists;
240 : int bIsDirectory;
241 : time_t mTime;
242 :
243 : vsi_l_offset lastDownloadedOffset;
244 : int nBlocksToDownload;
245 : int bEOF;
246 :
247 : int DownloadRegion(vsi_l_offset startOffset, int nBlocks);
248 :
249 : public:
250 :
251 : VSICurlHandle(VSICurlFilesystemHandler* poFS, const char* pszURL);
252 : ~VSICurlHandle();
253 :
254 : virtual int Seek( vsi_l_offset nOffset, int nWhence );
255 : virtual vsi_l_offset Tell();
256 : virtual size_t Read( void *pBuffer, size_t nSize, size_t nMemb );
257 : virtual int ReadMultiRange( int nRanges, void ** ppData,
258 : const vsi_l_offset* panOffsets, const size_t* panSizes );
259 : virtual size_t Write( const void *pBuffer, size_t nSize, size_t nMemb );
260 : virtual int Eof();
261 : virtual int Flush();
262 : virtual int Close();
263 :
264 642 : int IsKnownFileSize() const { return bHastComputedFileSize; }
265 : vsi_l_offset GetFileSize();
266 : int Exists();
267 642 : int IsDirectory() const { return bIsDirectory; }
268 642 : time_t GetMTime() const { return mTime; }
269 : };
270 :
271 : /************************************************************************/
272 : /* VSICurlHandle() */
273 : /************************************************************************/
274 :
275 690 : VSICurlHandle::VSICurlHandle(VSICurlFilesystemHandler* poFS, const char* pszURL)
276 : {
277 690 : this->poFS = poFS;
278 690 : this->pszURL = CPLStrdup(pszURL);
279 :
280 690 : curOffset = 0;
281 :
282 690 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
283 690 : eExists = cachedFileProp->eExists;
284 690 : fileSize = cachedFileProp->fileSize;
285 690 : bHastComputedFileSize = cachedFileProp->bHastComputedFileSize;
286 690 : bIsDirectory = cachedFileProp->bIsDirectory;
287 690 : mTime = cachedFileProp->mTime;
288 :
289 690 : lastDownloadedOffset = -1;
290 690 : nBlocksToDownload = 1;
291 690 : bEOF = FALSE;
292 690 : }
293 :
294 : /************************************************************************/
295 : /* ~VSICurlHandle() */
296 : /************************************************************************/
297 :
298 690 : VSICurlHandle::~VSICurlHandle()
299 : {
300 690 : CPLFree(pszURL);
301 690 : }
302 :
303 :
304 : /************************************************************************/
305 : /* Seek() */
306 : /************************************************************************/
307 :
308 442 : int VSICurlHandle::Seek( vsi_l_offset nOffset, int nWhence )
309 : {
310 442 : if (nWhence == SEEK_SET)
311 : {
312 251 : curOffset = nOffset;
313 : }
314 191 : else if (nWhence == SEEK_CUR)
315 : {
316 152 : curOffset = curOffset + nOffset;
317 : }
318 : else
319 : {
320 39 : curOffset = GetFileSize() + nOffset;
321 : }
322 442 : bEOF = FALSE;
323 442 : return 0;
324 : }
325 :
326 : /************************************************************************/
327 : /* VSICurlSetOptions() */
328 : /************************************************************************/
329 :
330 39 : static void VSICurlSetOptions(CURL* hCurlHandle, const char* pszURL)
331 : {
332 39 : curl_easy_setopt(hCurlHandle, CURLOPT_URL, pszURL);
333 39 : if (CSLTestBoolean(CPLGetConfigOption("CPL_CURL_VERBOSE", "NO")))
334 0 : curl_easy_setopt(hCurlHandle, CURLOPT_VERBOSE, 1);
335 :
336 : /* Set Proxy parameters */
337 39 : const char* pszProxy = CPLGetConfigOption("GDAL_HTTP_PROXY", NULL);
338 39 : if (pszProxy)
339 0 : curl_easy_setopt(hCurlHandle,CURLOPT_PROXY,pszProxy);
340 :
341 39 : const char* pszProxyUserPwd = CPLGetConfigOption("GDAL_HTTP_PROXYUSERPWD", NULL);
342 39 : if (pszProxyUserPwd)
343 0 : curl_easy_setopt(hCurlHandle,CURLOPT_PROXYUSERPWD,pszProxyUserPwd);
344 :
345 : /* Enable following redirections. Requires libcurl 7.10.1 at least */
346 39 : curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 1);
347 39 : curl_easy_setopt(hCurlHandle, CURLOPT_MAXREDIRS, 10);
348 :
349 : /* 7.16 */
350 : #if LIBCURL_VERSION_NUM >= 0x071000
351 39 : long option = CURLFTPMETHOD_SINGLECWD;
352 39 : curl_easy_setopt(hCurlHandle, CURLOPT_FTP_FILEMETHOD, option);
353 : #endif
354 :
355 : /* 7.12.3 */
356 : #if LIBCURL_VERSION_NUM > 0x070C03
357 : /* ftp://ftp2.cits.rncan.gc.ca/pub/cantopo/250k_tif/ doesn't like EPSV command */
358 39 : curl_easy_setopt(hCurlHandle, CURLOPT_FTP_USE_EPSV, 0);
359 : #endif
360 :
361 : /* NOSIGNAL should be set to true for timeout to work in multithread
362 : environments on Unix, requires libcurl 7.10 or more recent.
363 : (this force avoiding the use of sgnal handlers) */
364 :
365 : /* 7.10 */
366 : #if LIBCURL_VERSION_NUM >= 0x070A00
367 39 : curl_easy_setopt(hCurlHandle, CURLOPT_NOSIGNAL, 1);
368 : #endif
369 :
370 39 : curl_easy_setopt(hCurlHandle, CURLOPT_NOBODY, 0);
371 39 : curl_easy_setopt(hCurlHandle, CURLOPT_HTTPGET, 1);
372 39 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADER, 0);
373 :
374 : /* 7.16.4 */
375 : #if LIBCURL_VERSION_NUM <= 0x071004
376 : curl_easy_setopt(hCurlHandle, CURLOPT_FTPLISTONLY, 0);
377 : #elif LIBCURL_VERSION_NUM > 0x071004
378 39 : curl_easy_setopt(hCurlHandle, CURLOPT_DIRLISTONLY, 0);
379 : #endif
380 :
381 39 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
382 39 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
383 39 : }
384 :
385 :
386 : typedef struct
387 : {
388 : char* pBuffer;
389 : size_t nSize;
390 : int bIsHTTP;
391 : int bIsInHeader;
392 : int bMultiRange;
393 : vsi_l_offset nStartOffset;
394 : vsi_l_offset nEndOffset;
395 : int nHTTPCode;
396 : vsi_l_offset nContentLength;
397 : int bFoundContentRange;
398 : int bError;
399 : } WriteFuncStruct;
400 :
401 : /************************************************************************/
402 : /* VSICURLInitWriteFuncStruct() */
403 : /************************************************************************/
404 :
405 57 : static void VSICURLInitWriteFuncStruct(WriteFuncStruct* psStruct)
406 : {
407 57 : psStruct->pBuffer = NULL;
408 57 : psStruct->nSize = 0;
409 57 : psStruct->bIsHTTP = FALSE;
410 57 : psStruct->bIsInHeader = TRUE;
411 57 : psStruct->bMultiRange = FALSE;
412 57 : psStruct->nStartOffset = 0;
413 57 : psStruct->nEndOffset = 0;
414 57 : psStruct->nHTTPCode = 0;
415 57 : psStruct->nContentLength = 0;
416 57 : psStruct->bFoundContentRange = FALSE;
417 57 : psStruct->bError = FALSE;
418 57 : }
419 :
420 : /************************************************************************/
421 : /* VSICurlHandleWriteFunc() */
422 : /************************************************************************/
423 :
424 457 : static int VSICurlHandleWriteFunc(void *buffer, size_t count, size_t nmemb, void *req)
425 : {
426 457 : WriteFuncStruct* psStruct = (WriteFuncStruct*) req;
427 457 : size_t nSize = count * nmemb;
428 :
429 : char* pNewBuffer = (char*) VSIRealloc(psStruct->pBuffer,
430 457 : psStruct->nSize + nSize + 1);
431 457 : if (pNewBuffer)
432 : {
433 457 : psStruct->pBuffer = pNewBuffer;
434 457 : memcpy(psStruct->pBuffer + psStruct->nSize, buffer, nSize);
435 457 : psStruct->pBuffer[psStruct->nSize + nSize] = '\0';
436 457 : if (psStruct->bIsHTTP && psStruct->bIsInHeader)
437 : {
438 145 : char* pszLine = psStruct->pBuffer + psStruct->nSize;
439 160 : if (EQUALN(pszLine, "HTTP/1.0 ", 9) ||
440 : EQUALN(pszLine, "HTTP/1.1 ", 9))
441 15 : psStruct->nHTTPCode = atoi(pszLine + 9);
442 130 : else if (EQUALN(pszLine, "Content-Length: ", 16))
443 : psStruct->nContentLength = CPLScanUIntBig(pszLine + 16,
444 14 : strlen(pszLine + 16));
445 116 : else if (EQUALN(pszLine, "Content-Range: ", 15))
446 6 : psStruct->bFoundContentRange = TRUE;
447 :
448 : /*if (nSize > 2 && pszLine[nSize - 2] == '\r' &&
449 : pszLine[nSize - 1] == '\n')
450 : {
451 : pszLine[nSize - 2] = 0;
452 : CPLDebug("VSICURL", "%s", pszLine);
453 : pszLine[nSize - 2] = '\r';
454 : }*/
455 :
456 145 : if (pszLine[0] == '\r' || pszLine[0] == '\n')
457 : {
458 15 : psStruct->bIsInHeader = FALSE;
459 :
460 : /* Detect servers that don't support range downloading */
461 15 : if (psStruct->nHTTPCode == 200 &&
462 : !psStruct->bMultiRange &&
463 : !psStruct->bFoundContentRange &&
464 : (psStruct->nStartOffset != 0 || psStruct->nContentLength > 10 *
465 : (psStruct->nEndOffset - psStruct->nStartOffset + 1)))
466 : {
467 : CPLError(CE_Failure, CPLE_AppDefined,
468 0 : "Range downloading not supported by this server !");
469 0 : psStruct->bError = TRUE;
470 0 : return 0;
471 : }
472 : }
473 : }
474 457 : psStruct->nSize += nSize;
475 457 : return nmemb;
476 : }
477 : else
478 : {
479 0 : return 0;
480 : }
481 : }
482 :
483 :
484 : /************************************************************************/
485 : /* GetFileSize() */
486 : /************************************************************************/
487 :
488 53 : vsi_l_offset VSICurlHandle::GetFileSize()
489 : {
490 : WriteFuncStruct sWriteFuncData;
491 :
492 53 : if (bHastComputedFileSize)
493 47 : return fileSize;
494 :
495 6 : bHastComputedFileSize = TRUE;
496 :
497 : /* Consider that only the files whose extension ends up with one that is */
498 : /* listed in CPL_VSIL_CURL_ALLOWED_EXTENSIONS exist on the server */
499 : /* This can speeds up dramatically open experience, in case the server */
500 : /* cannot return a file list */
501 : /* For example : */
502 : /* gdalinfo --config CPL_VSIL_CURL_ALLOWED_EXTENSIONS ".tif" /vsicurl/http://igskmncngs506.cr.usgs.gov/gmted/Global_tiles_GMTED/075darcsec/bln/W030/30N030W_20101117_gmted_bln075.tif */
503 : const char* pszAllowedExtensions =
504 6 : CPLGetConfigOption("CPL_VSIL_CURL_ALLOWED_EXTENSIONS", NULL);
505 6 : if (pszAllowedExtensions)
506 : {
507 1 : char** papszExtensions = CSLTokenizeString2( pszAllowedExtensions, ", ", 0 );
508 1 : int nURLLen = strlen(pszURL);
509 1 : int bFound = FALSE;
510 1 : for(int i=0;papszExtensions[i] != NULL;i++)
511 : {
512 1 : int nExtensionLen = strlen(papszExtensions[i]);
513 2 : if (nURLLen > nExtensionLen &&
514 1 : EQUAL(pszURL + nURLLen - nExtensionLen, papszExtensions[i]))
515 : {
516 1 : bFound = TRUE;
517 1 : break;
518 : }
519 : }
520 :
521 1 : if (!bFound)
522 : {
523 0 : eExists = EXIST_NO;
524 0 : fileSize = 0;
525 :
526 0 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
527 0 : cachedFileProp->bHastComputedFileSize = TRUE;
528 0 : cachedFileProp->fileSize = fileSize;
529 0 : cachedFileProp->eExists = eExists;
530 :
531 0 : CSLDestroy(papszExtensions);
532 :
533 0 : return 0;
534 : }
535 :
536 1 : CSLDestroy(papszExtensions);
537 : }
538 :
539 : #if LIBCURL_VERSION_NUM < 0x070B00
540 : /* Curl 7.10.X doesn't manage to unset the CURLOPT_RANGE that would have been */
541 : /* previously set, so we have to reinit the connection handle */
542 : poFS->GetCurlHandleFor("");
543 : #endif
544 6 : CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
545 :
546 6 : VSICurlSetOptions(hCurlHandle, pszURL);
547 6 : curl_easy_setopt(hCurlHandle, CURLOPT_NOBODY, 1);
548 6 : curl_easy_setopt(hCurlHandle, CURLOPT_HTTPGET, 0);
549 6 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADER, 1);
550 :
551 : /* We need that otherwise OSGEO4W's libcurl issue a dummy range request */
552 : /* when doing a HEAD when recycling connections */
553 6 : curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, NULL);
554 :
555 : /* Bug with older curl versions (<=7.16.4) and FTP. See http://curl.haxx.se/mail/lib-2007-08/0312.html */
556 6 : VSICURLInitWriteFuncStruct(&sWriteFuncData);
557 6 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
558 6 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
559 :
560 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
561 6 : szCurlErrBuf[0] = '\0';
562 6 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
563 :
564 6 : double dfSize = 0;
565 6 : curl_easy_perform(hCurlHandle);
566 :
567 6 : eExists = EXIST_UNKNOWN;
568 :
569 6 : if (strncmp(pszURL, "ftp", 3) == 0)
570 : {
571 0 : if (sWriteFuncData.pBuffer != NULL &&
572 : strncmp(sWriteFuncData.pBuffer, "Content-Length: ", strlen( "Content-Length: ")) == 0)
573 : {
574 0 : const char* pszBuffer = sWriteFuncData.pBuffer + strlen("Content-Length: ");
575 0 : eExists = EXIST_YES;
576 0 : fileSize = CPLScanUIntBig(pszBuffer, sWriteFuncData.nSize - strlen("Content-Length: "));
577 : if (ENABLE_DEBUG)
578 : CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB,
579 0 : pszURL, fileSize);
580 : }
581 : }
582 :
583 6 : if (eExists != EXIST_YES)
584 : {
585 6 : CURLcode code = curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &dfSize );
586 6 : if (code == 0)
587 : {
588 6 : eExists = EXIST_YES;
589 6 : if (dfSize < 0)
590 0 : fileSize = 0;
591 : else
592 6 : fileSize = (GUIntBig)dfSize;
593 : }
594 : else
595 : {
596 0 : eExists = EXIST_NO;
597 0 : fileSize = 0;
598 0 : CPLError(CE_Failure, CPLE_AppDefined, "VSICurlHandle::GetFileSize failed");
599 : }
600 :
601 6 : long response_code = 0;
602 6 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
603 6 : if (response_code != 200)
604 : {
605 0 : eExists = EXIST_NO;
606 0 : fileSize = 0;
607 : }
608 :
609 : /* Try to guess if this is a directory. Generally if this is a directory, */
610 : /* curl will retry with an URL with slash added */
611 6 : char *pszEffectiveURL = NULL;
612 6 : curl_easy_getinfo(hCurlHandle, CURLINFO_EFFECTIVE_URL, &pszEffectiveURL);
613 12 : if (pszEffectiveURL != NULL && strncmp(pszURL, pszEffectiveURL, strlen(pszURL)) == 0 &&
614 6 : pszEffectiveURL[strlen(pszURL)] == '/')
615 : {
616 1 : eExists = EXIST_YES;
617 1 : fileSize = 0;
618 1 : bIsDirectory = TRUE;
619 : }
620 :
621 : if (ENABLE_DEBUG)
622 : CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB " response_code=%d",
623 6 : pszURL, fileSize, (int)response_code);
624 : }
625 :
626 6 : CPLFree(sWriteFuncData.pBuffer);
627 :
628 6 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
629 6 : cachedFileProp->bHastComputedFileSize = TRUE;
630 6 : cachedFileProp->fileSize = fileSize;
631 6 : cachedFileProp->eExists = eExists;
632 6 : cachedFileProp->bIsDirectory = bIsDirectory;
633 :
634 6 : return fileSize;
635 : }
636 :
637 : /************************************************************************/
638 : /* Exists() */
639 : /************************************************************************/
640 :
641 642 : int VSICurlHandle::Exists()
642 : {
643 642 : if (eExists == EXIST_UNKNOWN)
644 4 : GetFileSize();
645 642 : return eExists == EXIST_YES;
646 : }
647 :
648 : /************************************************************************/
649 : /* Tell() */
650 : /************************************************************************/
651 :
652 106 : vsi_l_offset VSICurlHandle::Tell()
653 : {
654 106 : return curOffset;
655 : }
656 :
657 : /************************************************************************/
658 : /* DownloadRegion() */
659 : /************************************************************************/
660 :
661 17 : int VSICurlHandle::DownloadRegion(vsi_l_offset startOffset, int nBlocks)
662 : {
663 : WriteFuncStruct sWriteFuncData;
664 : WriteFuncStruct sWriteFuncHeaderData;
665 :
666 17 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
667 17 : if (cachedFileProp->eExists == EXIST_NO)
668 0 : return FALSE;
669 :
670 17 : CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
671 17 : VSICurlSetOptions(hCurlHandle, pszURL);
672 :
673 17 : VSICURLInitWriteFuncStruct(&sWriteFuncData);
674 17 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
675 17 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
676 :
677 17 : VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData);
678 17 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
679 17 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
680 17 : sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
681 17 : sWriteFuncHeaderData.nStartOffset = startOffset;
682 17 : sWriteFuncHeaderData.nEndOffset = startOffset + nBlocks * DOWNLOAD_CHUNCK_SIZE - 1;
683 :
684 : char rangeStr[512];
685 17 : sprintf(rangeStr, CPL_FRMT_GUIB "-" CPL_FRMT_GUIB, startOffset, startOffset + nBlocks * DOWNLOAD_CHUNCK_SIZE - 1);
686 :
687 : if (ENABLE_DEBUG)
688 17 : CPLDebug("VSICURL", "Downloading %s (%s)...", rangeStr, pszURL);
689 :
690 17 : curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, rangeStr);
691 :
692 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
693 17 : szCurlErrBuf[0] = '\0';
694 17 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
695 :
696 17 : curl_easy_perform(hCurlHandle);
697 :
698 17 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, NULL);
699 17 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, NULL);
700 17 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
701 17 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
702 :
703 17 : long response_code = 0;
704 17 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
705 :
706 17 : char *content_type = 0;
707 17 : curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_TYPE, &content_type);
708 :
709 : if (ENABLE_DEBUG)
710 17 : CPLDebug("VSICURL", "Got reponse_code=%ld", response_code);
711 :
712 26 : if ((response_code != 200 && response_code != 206 &&
713 : response_code != 225 && response_code != 226 && response_code != 426) || sWriteFuncHeaderData.bError)
714 : {
715 0 : if (response_code >= 400 && szCurlErrBuf[0] != '\0')
716 : {
717 0 : if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
718 : CPLError(CE_Failure, CPLE_AppDefined, "%d: %s, %s",
719 : (int)response_code, szCurlErrBuf,
720 0 : "Range downloading not supported by this server !");
721 : else
722 0 : CPLError(CE_Failure, CPLE_AppDefined, "%d: %s", (int)response_code, szCurlErrBuf);
723 : }
724 0 : if (!bHastComputedFileSize && startOffset == 0)
725 : {
726 0 : cachedFileProp->bHastComputedFileSize = bHastComputedFileSize = TRUE;
727 0 : cachedFileProp->fileSize = fileSize = 0;
728 0 : cachedFileProp->eExists = eExists = EXIST_NO;
729 : }
730 0 : CPLFree(sWriteFuncData.pBuffer);
731 0 : CPLFree(sWriteFuncHeaderData.pBuffer);
732 0 : return FALSE;
733 : }
734 :
735 17 : if (!bHastComputedFileSize && sWriteFuncHeaderData.pBuffer)
736 : {
737 : /* Try to retrieve the filesize from the HTTP headers */
738 : /* if in the form : "Content-Range: bytes x-y/filesize" */
739 8 : char* pszContentRange = strstr(sWriteFuncHeaderData.pBuffer, "Content-Range: bytes ");
740 8 : if (pszContentRange)
741 : {
742 3 : char* pszEOL = strchr(pszContentRange, '\n');
743 3 : if (pszEOL)
744 : {
745 3 : *pszEOL = 0;
746 3 : pszEOL = strchr(pszContentRange, '\r');
747 3 : if (pszEOL)
748 3 : *pszEOL = 0;
749 3 : char* pszSlash = strchr(pszContentRange, '/');
750 3 : if (pszSlash)
751 : {
752 3 : pszSlash ++;
753 3 : fileSize = CPLScanUIntBig(pszSlash, strlen(pszSlash));
754 : }
755 : }
756 : }
757 5 : else if (strncmp(pszURL, "ftp", 3) == 0)
758 : {
759 : /* Parse 213 answer for FTP protocol */
760 0 : char* pszSize = strstr(sWriteFuncHeaderData.pBuffer, "213 ");
761 0 : if (pszSize)
762 : {
763 0 : pszSize += 4;
764 0 : char* pszEOL = strchr(pszSize, '\n');
765 0 : if (pszEOL)
766 : {
767 0 : *pszEOL = 0;
768 0 : pszEOL = strchr(pszSize, '\r');
769 0 : if (pszEOL)
770 0 : *pszEOL = 0;
771 :
772 0 : fileSize = CPLScanUIntBig(pszSize, strlen(pszSize));
773 : }
774 : }
775 : }
776 :
777 8 : if (fileSize != 0)
778 : {
779 3 : eExists = EXIST_YES;
780 :
781 : if (ENABLE_DEBUG)
782 : CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB " response_code=%d",
783 3 : pszURL, fileSize, (int)response_code);
784 :
785 3 : bHastComputedFileSize = cachedFileProp->bHastComputedFileSize = TRUE;
786 3 : cachedFileProp->fileSize = fileSize;
787 3 : cachedFileProp->eExists = eExists;
788 : }
789 : }
790 :
791 17 : lastDownloadedOffset = startOffset + nBlocks * DOWNLOAD_CHUNCK_SIZE;
792 :
793 17 : char* pBuffer = sWriteFuncData.pBuffer;
794 17 : int nSize = sWriteFuncData.nSize;
795 :
796 17 : if (nSize > nBlocks * DOWNLOAD_CHUNCK_SIZE)
797 : {
798 : if (ENABLE_DEBUG)
799 : CPLDebug("VSICURL", "Got more data than expected : %d instead of %d",
800 0 : nSize, nBlocks * DOWNLOAD_CHUNCK_SIZE);
801 : }
802 :
803 52 : while(nSize > 0)
804 : {
805 : //if (ENABLE_DEBUG)
806 : // CPLDebug("VSICURL", "Add region %d - %d", startOffset, MIN(DOWNLOAD_CHUNCK_SIZE, nSize));
807 18 : poFS->AddRegion(pszURL, startOffset, MIN(DOWNLOAD_CHUNCK_SIZE, nSize), pBuffer);
808 18 : startOffset += DOWNLOAD_CHUNCK_SIZE;
809 18 : pBuffer += DOWNLOAD_CHUNCK_SIZE;
810 18 : nSize -= DOWNLOAD_CHUNCK_SIZE;
811 : }
812 :
813 17 : CPLFree(sWriteFuncData.pBuffer);
814 17 : CPLFree(sWriteFuncHeaderData.pBuffer);
815 :
816 17 : return TRUE;
817 : }
818 :
819 : /************************************************************************/
820 : /* Read() */
821 : /************************************************************************/
822 :
823 6020 : size_t VSICurlHandle::Read( void *pBuffer, size_t nSize, size_t nMemb )
824 : {
825 6020 : size_t nBufferRequestSize = nSize * nMemb;
826 6020 : if (nBufferRequestSize == 0)
827 0 : return 0;
828 :
829 : //CPLDebug("VSICURL", "offset=%d, size=%d", (int)curOffset, (int)nBufferRequestSize);
830 :
831 6020 : vsi_l_offset iterOffset = curOffset;
832 18051 : while (nBufferRequestSize)
833 : {
834 6022 : const CachedRegion* psRegion = poFS->GetRegion(pszURL, iterOffset);
835 6022 : if (psRegion == NULL)
836 : {
837 : vsi_l_offset nOffsetToDownload =
838 17 : (iterOffset / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
839 :
840 17 : if (nOffsetToDownload == lastDownloadedOffset)
841 : {
842 : /* In case of consecutive reads (of small size), we use a */
843 : /* heuristic that we will read the file sequentially, so */
844 : /* we double the requested size to decrease the number of */
845 : /* client/server roundtrips. */
846 1 : if (nBlocksToDownload < 100)
847 1 : nBlocksToDownload *= 2;
848 : }
849 : else
850 : {
851 : /* Random reads. Cancel the above heuristics */
852 16 : nBlocksToDownload = 1;
853 : }
854 :
855 : /* Ensure that we will request at least the number of blocks */
856 : /* to satisfy the remaining buffer size to read */
857 : vsi_l_offset nEndOffsetToDownload =
858 17 : ((iterOffset + nBufferRequestSize) / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
859 : int nMinBlocksToDownload = 1 + (int)
860 17 : ((nEndOffsetToDownload - nOffsetToDownload) / DOWNLOAD_CHUNCK_SIZE);
861 17 : if (nBlocksToDownload < nMinBlocksToDownload)
862 0 : nBlocksToDownload = nMinBlocksToDownload;
863 :
864 : int i;
865 : /* Avoid reading already cached data */
866 18 : for(i=1;i<nBlocksToDownload;i++)
867 : {
868 1 : if (poFS->GetRegion(pszURL, nOffsetToDownload + i * DOWNLOAD_CHUNCK_SIZE) != NULL)
869 : {
870 0 : nBlocksToDownload = i;
871 0 : break;
872 : }
873 : }
874 :
875 17 : if (DownloadRegion(nOffsetToDownload, nBlocksToDownload) == FALSE)
876 : {
877 0 : bEOF = TRUE;
878 0 : return 0;
879 : }
880 17 : psRegion = poFS->GetRegion(pszURL, iterOffset);
881 : }
882 6022 : if (psRegion == NULL || psRegion->pData == NULL)
883 : {
884 0 : bEOF = TRUE;
885 0 : return 0;
886 : }
887 6022 : int nToCopy = (int) MIN(nBufferRequestSize, psRegion->nSize - (iterOffset - psRegion->nFileOffsetStart));
888 : memcpy(pBuffer, psRegion->pData + iterOffset - psRegion->nFileOffsetStart,
889 6022 : nToCopy);
890 6022 : pBuffer = (char*) pBuffer + nToCopy;
891 6022 : iterOffset += nToCopy;
892 6022 : nBufferRequestSize -= nToCopy;
893 6022 : if (psRegion->nSize != DOWNLOAD_CHUNCK_SIZE && nBufferRequestSize != 0)
894 : {
895 11 : break;
896 : }
897 : }
898 :
899 6020 : size_t ret = (size_t) ((iterOffset - curOffset) / nSize);
900 6020 : if (ret != nMemb)
901 11 : bEOF = TRUE;
902 :
903 6020 : curOffset = iterOffset;
904 :
905 6020 : return ret;
906 : }
907 :
908 :
909 : /************************************************************************/
910 : /* ReadMultiRange() */
911 : /************************************************************************/
912 :
913 1 : int VSICurlHandle::ReadMultiRange( int nRanges, void ** ppData,
914 : const vsi_l_offset* panOffsets,
915 : const size_t* panSizes )
916 : {
917 : WriteFuncStruct sWriteFuncData;
918 : WriteFuncStruct sWriteFuncHeaderData;
919 :
920 1 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
921 1 : if (cachedFileProp->eExists == EXIST_NO)
922 0 : return -1;
923 :
924 1 : CPLString osRanges, osFirstRange, osLastRange;
925 : int i;
926 1 : int nMergedRanges = 0;
927 1 : vsi_l_offset nTotalReqSize = 0;
928 129 : for(i=0;i<nRanges;i++)
929 : {
930 128 : CPLString osCurRange;
931 128 : if (i != 0)
932 127 : osRanges.append(",");
933 128 : osCurRange = CPLSPrintf(CPL_FRMT_GUIB "-", panOffsets[i]);
934 256 : while (i + 1 < nRanges && panOffsets[i] + panSizes[i] == panOffsets[i+1])
935 : {
936 0 : nTotalReqSize += panSizes[i];
937 0 : i ++;
938 : }
939 128 : nTotalReqSize += panSizes[i];
940 128 : osCurRange.append(CPLSPrintf(CPL_FRMT_GUIB, panOffsets[i] + panSizes[i]-1));
941 128 : nMergedRanges ++;
942 :
943 128 : osRanges += osCurRange;
944 :
945 128 : if (nMergedRanges == 1)
946 1 : osFirstRange = osCurRange;
947 128 : osLastRange = osCurRange;
948 : }
949 :
950 1 : const char* pszMaxRanges = CPLGetConfigOption("CPL_VSIL_CURL_MAX_RANGES", "250");
951 1 : int nMaxRanges = atoi(pszMaxRanges);
952 1 : if (nMaxRanges <= 0)
953 0 : nMaxRanges = 250;
954 1 : if (nMergedRanges > nMaxRanges)
955 : {
956 0 : int nHalf = nRanges / 2;
957 0 : int nRet = ReadMultiRange(nHalf, ppData, panOffsets, panSizes);
958 0 : if (nRet != 0)
959 0 : return nRet;
960 0 : return ReadMultiRange(nRanges - nHalf, ppData + nHalf, panOffsets + nHalf, panSizes + nHalf);
961 : }
962 :
963 1 : CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
964 1 : VSICurlSetOptions(hCurlHandle, pszURL);
965 :
966 1 : VSICURLInitWriteFuncStruct(&sWriteFuncData);
967 1 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
968 1 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
969 :
970 1 : VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData);
971 1 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
972 1 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
973 1 : sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
974 1 : sWriteFuncHeaderData.bMultiRange = nMergedRanges > 1;
975 1 : if (nMergedRanges == 1)
976 : {
977 0 : sWriteFuncHeaderData.nStartOffset = panOffsets[0];
978 0 : sWriteFuncHeaderData.nEndOffset = panOffsets[0] + nTotalReqSize-1;
979 : }
980 :
981 : if (ENABLE_DEBUG)
982 : {
983 1 : if (nMergedRanges == 1)
984 0 : CPLDebug("VSICURL", "Downloading %s (%s)...", osRanges.c_str(), pszURL);
985 : else
986 : CPLDebug("VSICURL", "Downloading %s, ..., %s (" CPL_FRMT_GUIB " bytes, %s)...",
987 1 : osFirstRange.c_str(), osLastRange.c_str(), (GUIntBig)nTotalReqSize, pszURL);
988 : }
989 :
990 1 : curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, osRanges.c_str());
991 :
992 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
993 1 : szCurlErrBuf[0] = '\0';
994 1 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
995 :
996 1 : curl_easy_perform(hCurlHandle);
997 :
998 1 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, NULL);
999 1 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, NULL);
1000 1 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
1001 1 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
1002 :
1003 1 : long response_code = 0;
1004 1 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
1005 :
1006 1 : char *content_type = 0;
1007 1 : curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_TYPE, &content_type);
1008 :
1009 1 : if ((response_code != 200 && response_code != 206 &&
1010 : response_code != 225 && response_code != 226 && response_code != 426) || sWriteFuncHeaderData.bError)
1011 : {
1012 0 : if (response_code >= 400 && szCurlErrBuf[0] != '\0')
1013 : {
1014 0 : if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
1015 : CPLError(CE_Failure, CPLE_AppDefined, "%d: %s, %s",
1016 : (int)response_code, szCurlErrBuf,
1017 0 : "Range downloading not supported by this server !");
1018 : else
1019 0 : CPLError(CE_Failure, CPLE_AppDefined, "%d: %s", (int)response_code, szCurlErrBuf);
1020 : }
1021 : /*
1022 : if (!bHastComputedFileSize && startOffset == 0)
1023 : {
1024 : cachedFileProp->bHastComputedFileSize = bHastComputedFileSize = TRUE;
1025 : cachedFileProp->fileSize = fileSize = 0;
1026 : cachedFileProp->eExists = eExists = EXIST_NO;
1027 : }
1028 : */
1029 0 : CPLFree(sWriteFuncData.pBuffer);
1030 0 : CPLFree(sWriteFuncHeaderData.pBuffer);
1031 0 : return -1;
1032 : }
1033 :
1034 1 : char* pBuffer = sWriteFuncData.pBuffer;
1035 1 : int nSize = sWriteFuncData.nSize;
1036 :
1037 1 : int nRet = -1;
1038 : char* pszBoundary;
1039 1 : CPLString osBoundary;
1040 : char *pszNext;
1041 1 : int iRange = 0;
1042 1 : int iPart = 0;
1043 : char* pszEOL;
1044 :
1045 : /* -------------------------------------------------------------------- */
1046 : /* No multipart if a single range has been requested */
1047 : /* -------------------------------------------------------------------- */
1048 :
1049 1 : if (nMergedRanges == 1)
1050 : {
1051 0 : int nAccSize = 0;
1052 0 : if ((vsi_l_offset)nSize < nTotalReqSize)
1053 0 : goto end;
1054 :
1055 0 : for(i=0;i<nRanges;i++)
1056 : {
1057 0 : memcpy(ppData[i], pBuffer + nAccSize, panSizes[i]);
1058 0 : nAccSize += panSizes[i];
1059 : }
1060 :
1061 0 : nRet = 0;
1062 0 : goto end;
1063 : }
1064 :
1065 : /* -------------------------------------------------------------------- */
1066 : /* Extract boundary name */
1067 : /* -------------------------------------------------------------------- */
1068 :
1069 : pszBoundary = strstr(sWriteFuncHeaderData.pBuffer,
1070 1 : "Content-Type: multipart/byteranges; boundary=");
1071 1 : if( pszBoundary == NULL )
1072 : {
1073 : CPLError( CE_Failure, CPLE_AppDefined, "Could not find '%s'",
1074 0 : "Content-Type: multipart/byteranges; boundary=" );
1075 0 : goto end;
1076 : }
1077 :
1078 1 : pszBoundary += strlen( "Content-Type: multipart/byteranges; boundary=" );
1079 :
1080 1 : pszEOL = strchr(pszBoundary, '\r');
1081 1 : if (pszEOL)
1082 1 : *pszEOL = 0;
1083 1 : pszEOL = strchr(pszBoundary, '\n');
1084 1 : if (pszEOL)
1085 0 : *pszEOL = 0;
1086 :
1087 : /* Remove optional double-quote character around boundary name */
1088 1 : if (pszBoundary[0] == '"')
1089 : {
1090 0 : pszBoundary ++;
1091 0 : char* pszLastDoubleQuote = strrchr(pszBoundary, '"');
1092 0 : if (pszLastDoubleQuote)
1093 0 : *pszLastDoubleQuote = 0;
1094 : }
1095 :
1096 1 : osBoundary = "--";
1097 1 : osBoundary += pszBoundary;
1098 :
1099 : /* -------------------------------------------------------------------- */
1100 : /* Find the start of the first chunk. */
1101 : /* -------------------------------------------------------------------- */
1102 1 : pszNext = strstr(pBuffer,osBoundary.c_str());
1103 1 : if( pszNext == NULL )
1104 : {
1105 0 : CPLError( CE_Failure, CPLE_AppDefined, "No parts found." );
1106 0 : goto end;
1107 : }
1108 :
1109 1 : pszNext += strlen(osBoundary);
1110 2 : while( *pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0' )
1111 0 : pszNext++;
1112 1 : if( *pszNext == '\r' )
1113 1 : pszNext++;
1114 1 : if( *pszNext == '\n' )
1115 1 : pszNext++;
1116 :
1117 : /* -------------------------------------------------------------------- */
1118 : /* Loop over parts... */
1119 : /* -------------------------------------------------------------------- */
1120 129 : while( iPart < nRanges )
1121 : {
1122 : /* -------------------------------------------------------------------- */
1123 : /* Collect headers. */
1124 : /* -------------------------------------------------------------------- */
1125 128 : int bExpectedRange = FALSE;
1126 :
1127 512 : while( *pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0' )
1128 : {
1129 256 : char *pszEOL = strstr(pszNext,"\n");
1130 :
1131 256 : if( pszEOL == NULL )
1132 : {
1133 : CPLError(CE_Failure, CPLE_AppDefined,
1134 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1135 0 : goto end;
1136 : }
1137 :
1138 256 : *pszEOL = '\0';
1139 256 : int bRestoreAntislashR = FALSE;
1140 256 : if (pszEOL - pszNext > 1 && pszEOL[-1] == '\r')
1141 : {
1142 256 : bRestoreAntislashR = TRUE;
1143 256 : pszEOL[-1] = '\0';
1144 : }
1145 :
1146 256 : if (EQUALN(pszNext, "Content-Range: bytes ", strlen("Content-Range: bytes ")))
1147 : {
1148 128 : bExpectedRange = TRUE; /* FIXME */
1149 : }
1150 :
1151 256 : if (bRestoreAntislashR)
1152 256 : pszEOL[-1] = '\r';
1153 256 : *pszEOL = '\n';
1154 :
1155 256 : pszNext = pszEOL + 1;
1156 : }
1157 :
1158 128 : if (!bExpectedRange)
1159 : {
1160 : CPLError(CE_Failure, CPLE_AppDefined,
1161 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1162 0 : goto end;
1163 : }
1164 :
1165 128 : if( *pszNext == '\r' )
1166 128 : pszNext++;
1167 128 : if( *pszNext == '\n' )
1168 128 : pszNext++;
1169 :
1170 : /* -------------------------------------------------------------------- */
1171 : /* Work out the data block size. */
1172 : /* -------------------------------------------------------------------- */
1173 128 : size_t nBytesAvail = nSize - (pszNext - pBuffer);
1174 :
1175 0 : while(TRUE)
1176 : {
1177 128 : if (nBytesAvail < panSizes[iRange])
1178 : {
1179 : CPLError(CE_Failure, CPLE_AppDefined,
1180 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1181 0 : goto end;
1182 : }
1183 :
1184 128 : memcpy(ppData[iRange], pszNext, panSizes[iRange]);
1185 128 : pszNext += panSizes[iRange];
1186 128 : nBytesAvail -= panSizes[iRange];
1187 509 : if( iRange + 1 < nRanges &&
1188 381 : panOffsets[iRange] + panSizes[iRange] == panOffsets[iRange + 1] )
1189 : {
1190 0 : iRange++;
1191 : }
1192 : else
1193 : break;
1194 : }
1195 :
1196 128 : iPart ++;
1197 128 : iRange ++;
1198 :
1199 512 : while( nBytesAvail > 0
1200 : && (*pszNext != '-'
1201 : || strncmp(pszNext,osBoundary,strlen(osBoundary)) != 0) )
1202 : {
1203 256 : pszNext++;
1204 256 : nBytesAvail--;
1205 : }
1206 :
1207 128 : if( nBytesAvail == 0 )
1208 : {
1209 : CPLError(CE_Failure, CPLE_AppDefined,
1210 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1211 0 : goto end;
1212 : }
1213 :
1214 128 : pszNext += strlen(osBoundary);
1215 128 : if( strncmp(pszNext,"--",2) == 0 )
1216 : {
1217 : /* End of multipart */
1218 1 : break;
1219 : }
1220 :
1221 127 : if( *pszNext == '\r' )
1222 127 : pszNext++;
1223 127 : if( *pszNext == '\n' )
1224 127 : pszNext++;
1225 : else
1226 : {
1227 : CPLError(CE_Failure, CPLE_AppDefined,
1228 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1229 0 : goto end;
1230 : }
1231 : }
1232 :
1233 1 : if (iPart == nMergedRanges)
1234 1 : nRet = 0;
1235 : else
1236 : CPLError(CE_Failure, CPLE_AppDefined,
1237 0 : "Got only %d parts, where %d were expected", iPart, nMergedRanges);
1238 :
1239 : end:
1240 1 : CPLFree(sWriteFuncData.pBuffer);
1241 1 : CPLFree(sWriteFuncHeaderData.pBuffer);
1242 :
1243 1 : return nRet;
1244 : }
1245 :
1246 : /************************************************************************/
1247 : /* Write() */
1248 : /************************************************************************/
1249 :
1250 0 : size_t VSICurlHandle::Write( const void *pBuffer, size_t nSize, size_t nMemb )
1251 : {
1252 0 : return 0;
1253 : }
1254 :
1255 : /************************************************************************/
1256 : /* Eof() */
1257 : /************************************************************************/
1258 :
1259 :
1260 3 : int VSICurlHandle::Eof()
1261 : {
1262 3 : return bEOF;
1263 : }
1264 :
1265 : /************************************************************************/
1266 : /* Flush() */
1267 : /************************************************************************/
1268 :
1269 0 : int VSICurlHandle::Flush()
1270 : {
1271 0 : return 0;
1272 : }
1273 :
1274 : /************************************************************************/
1275 : /* Close() */
1276 : /************************************************************************/
1277 :
1278 48 : int VSICurlHandle::Close()
1279 : {
1280 48 : return 0;
1281 : }
1282 :
1283 :
1284 :
1285 :
1286 : /************************************************************************/
1287 : /* VSICurlFilesystemHandler() */
1288 : /************************************************************************/
1289 :
1290 647 : VSICurlFilesystemHandler::VSICurlFilesystemHandler()
1291 : {
1292 647 : hMutex = NULL;
1293 647 : papsRegions = NULL;
1294 647 : nRegions = 0;
1295 647 : bUseCacheDisk = CSLTestBoolean(CPLGetConfigOption("CPL_VSIL_CURL_USE_CACHE", "NO"));
1296 647 : }
1297 :
1298 : /************************************************************************/
1299 : /* ~VSICurlFilesystemHandler() */
1300 : /************************************************************************/
1301 :
1302 628 : VSICurlFilesystemHandler::~VSICurlFilesystemHandler()
1303 : {
1304 : int i;
1305 646 : for(i=0;i<nRegions;i++)
1306 : {
1307 18 : CPLFree(papsRegions[i]->pData);
1308 18 : CPLFree(papsRegions[i]);
1309 : }
1310 628 : CPLFree(papsRegions);
1311 :
1312 628 : std::map<CPLString, CachedFileProp*>::const_iterator iterCacheFileSize;
1313 :
1314 1128 : for( iterCacheFileSize = cacheFileSize.begin(); iterCacheFileSize != cacheFileSize.end(); iterCacheFileSize++ )
1315 : {
1316 500 : CPLFree(iterCacheFileSize->second);
1317 : }
1318 :
1319 628 : std::map<CPLString, CachedDirList*>::const_iterator iterCacheDirList;
1320 :
1321 643 : for( iterCacheDirList = cacheDirList.begin(); iterCacheDirList != cacheDirList.end(); iterCacheDirList++ )
1322 : {
1323 15 : CSLDestroy(iterCacheDirList->second->papszFileList);
1324 15 : CPLFree(iterCacheDirList->second);
1325 : }
1326 :
1327 628 : std::map<GIntBig, CachedConnection*>::const_iterator iterConnections;
1328 629 : for( iterConnections = mapConnections.begin(); iterConnections != mapConnections.end(); iterConnections++ )
1329 : {
1330 1 : curl_easy_cleanup(iterConnections->second->hCurlHandle);
1331 1 : delete iterConnections->second;
1332 : }
1333 :
1334 628 : if( hMutex != NULL )
1335 1 : CPLDestroyMutex( hMutex );
1336 628 : hMutex = NULL;
1337 628 : }
1338 :
1339 : /************************************************************************/
1340 : /* GetCurlHandleFor() */
1341 : /************************************************************************/
1342 :
1343 39 : CURL* VSICurlFilesystemHandler::GetCurlHandleFor(CPLString osURL)
1344 : {
1345 39 : CPLMutexHolder oHolder( &hMutex );
1346 :
1347 39 : std::map<GIntBig, CachedConnection*>::const_iterator iterConnections;
1348 :
1349 39 : iterConnections = mapConnections.find(CPLGetPID());
1350 39 : if (iterConnections == mapConnections.end())
1351 : {
1352 1 : CURL* hCurlHandle = curl_easy_init();
1353 1 : CachedConnection* psCachedConnection = new CachedConnection;
1354 1 : psCachedConnection->osURL = osURL;
1355 1 : psCachedConnection->hCurlHandle = hCurlHandle;
1356 1 : mapConnections[CPLGetPID()] = psCachedConnection;
1357 1 : return hCurlHandle;
1358 : }
1359 : else
1360 : {
1361 38 : CachedConnection* psCachedConnection = iterConnections->second;
1362 38 : if (osURL == psCachedConnection->osURL)
1363 10 : return psCachedConnection->hCurlHandle;
1364 :
1365 28 : const char* pszURL = osURL.c_str();
1366 28 : const char* pszEndOfServ = strchr(pszURL, '.');
1367 28 : if (pszEndOfServ != NULL)
1368 28 : pszEndOfServ = strchr(pszEndOfServ, '/');
1369 28 : if (pszEndOfServ == NULL)
1370 0 : pszURL = pszURL + strlen(pszURL);
1371 : int bReinitConnection = strncmp(psCachedConnection->osURL,
1372 28 : pszURL, pszEndOfServ-pszURL) != 0;
1373 :
1374 28 : if (bReinitConnection)
1375 : {
1376 6 : if (psCachedConnection->hCurlHandle)
1377 6 : curl_easy_cleanup(psCachedConnection->hCurlHandle);
1378 6 : psCachedConnection->hCurlHandle = curl_easy_init();
1379 : }
1380 28 : psCachedConnection->osURL = osURL;
1381 :
1382 28 : return psCachedConnection->hCurlHandle;
1383 0 : }
1384 : }
1385 :
1386 :
1387 : /************************************************************************/
1388 : /* GetRegionFromCacheDisk() */
1389 : /************************************************************************/
1390 :
1391 : const CachedRegion*
1392 0 : VSICurlFilesystemHandler::GetRegionFromCacheDisk(const char* pszURL,
1393 : vsi_l_offset nFileOffsetStart)
1394 : {
1395 0 : nFileOffsetStart = (nFileOffsetStart / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
1396 0 : VSILFILE* fp = VSIFOpenL(VSICurlGetCacheFileName(), "rb");
1397 0 : if (fp)
1398 : {
1399 0 : unsigned long pszURLHash = CPLHashSetHashStr(pszURL);
1400 : unsigned long pszURLHashCached;
1401 : vsi_l_offset nFileOffsetStartCached;
1402 : size_t nSizeCached;
1403 0 : while(TRUE)
1404 : {
1405 0 : if (VSIFReadL(&pszURLHashCached, 1, sizeof(unsigned long), fp) == 0)
1406 : break;
1407 0 : VSIFReadL(&nFileOffsetStartCached, 1, sizeof(vsi_l_offset), fp);
1408 0 : VSIFReadL(&nSizeCached, 1, sizeof(size_t), fp);
1409 0 : if (pszURLHash == pszURLHashCached &&
1410 : nFileOffsetStart == nFileOffsetStartCached)
1411 : {
1412 : if (ENABLE_DEBUG)
1413 0 : CPLDebug("VSICURL", "Got data at offset " CPL_FRMT_GUIB " from disk" , nFileOffsetStart);
1414 0 : if (nSizeCached)
1415 : {
1416 0 : char* pBuffer = (char*) CPLMalloc(nSizeCached);
1417 0 : VSIFReadL(pBuffer, 1, nSizeCached, fp);
1418 0 : AddRegion(pszURL, nFileOffsetStart, nSizeCached, pBuffer);
1419 0 : CPLFree(pBuffer);
1420 : }
1421 : else
1422 : {
1423 0 : AddRegion(pszURL, nFileOffsetStart, 0, NULL);
1424 : }
1425 0 : VSIFCloseL(fp);
1426 0 : return GetRegion(pszURL, nFileOffsetStart);
1427 : }
1428 : else
1429 : {
1430 0 : VSIFSeekL(fp, nSizeCached, SEEK_CUR);
1431 : }
1432 : }
1433 0 : VSIFCloseL(fp);
1434 : }
1435 0 : return NULL;
1436 : }
1437 :
1438 :
1439 : /************************************************************************/
1440 : /* AddRegionToCacheDisk() */
1441 : /************************************************************************/
1442 :
1443 0 : void VSICurlFilesystemHandler::AddRegionToCacheDisk(CachedRegion* psRegion)
1444 : {
1445 0 : VSILFILE* fp = VSIFOpenL(VSICurlGetCacheFileName(), "r+b");
1446 0 : if (fp)
1447 : {
1448 : unsigned long pszURLHashCached;
1449 : vsi_l_offset nFileOffsetStartCached;
1450 : size_t nSizeCached;
1451 0 : while(TRUE)
1452 : {
1453 0 : if (VSIFReadL(&pszURLHashCached, 1, sizeof(unsigned long), fp) == 0)
1454 : break;
1455 0 : VSIFReadL(&nFileOffsetStartCached, 1, sizeof(vsi_l_offset), fp);
1456 0 : VSIFReadL(&nSizeCached, 1, sizeof(size_t), fp);
1457 0 : if (psRegion->pszURLHash == pszURLHashCached &&
1458 : psRegion->nFileOffsetStart == nFileOffsetStartCached)
1459 : {
1460 0 : CPLAssert(psRegion->nSize == nSizeCached);
1461 0 : VSIFCloseL(fp);
1462 0 : return;
1463 : }
1464 : else
1465 : {
1466 0 : VSIFSeekL(fp, nSizeCached, SEEK_CUR);
1467 : }
1468 : }
1469 : }
1470 : else
1471 : {
1472 0 : fp = VSIFOpenL(VSICurlGetCacheFileName(), "wb");
1473 : }
1474 0 : if (fp)
1475 : {
1476 : if (ENABLE_DEBUG)
1477 0 : CPLDebug("VSICURL", "Write data at offset " CPL_FRMT_GUIB " to disk" , psRegion->nFileOffsetStart);
1478 0 : VSIFWriteL(&psRegion->pszURLHash, 1, sizeof(unsigned long), fp);
1479 0 : VSIFWriteL(&psRegion->nFileOffsetStart, 1, sizeof(vsi_l_offset), fp);
1480 0 : VSIFWriteL(&psRegion->nSize, 1, sizeof(size_t), fp);
1481 0 : if (psRegion->nSize)
1482 0 : VSIFWriteL(psRegion->pData, 1, psRegion->nSize, fp);
1483 :
1484 0 : VSIFCloseL(fp);
1485 : }
1486 0 : return;
1487 : }
1488 :
1489 :
1490 : /************************************************************************/
1491 : /* GetRegion() */
1492 : /************************************************************************/
1493 :
1494 6040 : const CachedRegion* VSICurlFilesystemHandler::GetRegion(const char* pszURL,
1495 : vsi_l_offset nFileOffsetStart)
1496 : {
1497 6040 : CPLMutexHolder oHolder( &hMutex );
1498 :
1499 6040 : unsigned long pszURLHash = CPLHashSetHashStr(pszURL);
1500 :
1501 6040 : nFileOffsetStart = (nFileOffsetStart / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
1502 : int i;
1503 6194 : for(i=0;i<nRegions;i++)
1504 : {
1505 6176 : CachedRegion* psRegion = papsRegions[i];
1506 6176 : if (psRegion->pszURLHash == pszURLHash &&
1507 : nFileOffsetStart == psRegion->nFileOffsetStart)
1508 : {
1509 6022 : memmove(papsRegions + 1, papsRegions, i * sizeof(CachedRegion*));
1510 6022 : papsRegions[0] = psRegion;
1511 6022 : return psRegion;
1512 : }
1513 : }
1514 18 : if (bUseCacheDisk)
1515 0 : return GetRegionFromCacheDisk(pszURL, nFileOffsetStart);
1516 18 : return NULL;
1517 : }
1518 :
1519 : /************************************************************************/
1520 : /* AddRegion() */
1521 : /************************************************************************/
1522 :
1523 18 : void VSICurlFilesystemHandler::AddRegion(const char* pszURL,
1524 : vsi_l_offset nFileOffsetStart,
1525 : size_t nSize,
1526 : const char *pData)
1527 : {
1528 18 : CPLMutexHolder oHolder( &hMutex );
1529 :
1530 18 : unsigned long pszURLHash = CPLHashSetHashStr(pszURL);
1531 :
1532 : CachedRegion* psRegion;
1533 18 : if (nRegions == N_MAX_REGIONS)
1534 : {
1535 0 : psRegion = papsRegions[N_MAX_REGIONS-1];
1536 0 : memmove(papsRegions + 1, papsRegions, (N_MAX_REGIONS-1) * sizeof(CachedRegion*));
1537 0 : papsRegions[0] = psRegion;
1538 0 : CPLFree(psRegion->pData);
1539 : }
1540 : else
1541 : {
1542 18 : papsRegions = (CachedRegion**) CPLRealloc(papsRegions, (nRegions + 1) * sizeof(CachedRegion*));
1543 18 : if (nRegions)
1544 17 : memmove(papsRegions + 1, papsRegions, nRegions * sizeof(CachedRegion*));
1545 18 : nRegions ++;
1546 18 : papsRegions[0] = psRegion = (CachedRegion*) CPLMalloc(sizeof(CachedRegion));
1547 : }
1548 :
1549 18 : psRegion->pszURLHash = pszURLHash;
1550 18 : psRegion->nFileOffsetStart = nFileOffsetStart;
1551 18 : psRegion->nSize = nSize;
1552 18 : psRegion->pData = (nSize) ? (char*) CPLMalloc(nSize) : NULL;
1553 18 : if (nSize)
1554 18 : memcpy(psRegion->pData, pData, nSize);
1555 :
1556 18 : if (bUseCacheDisk)
1557 0 : AddRegionToCacheDisk(psRegion);
1558 18 : }
1559 :
1560 : /************************************************************************/
1561 : /* GetCachedFileProp() */
1562 : /************************************************************************/
1563 :
1564 2467 : CachedFileProp* VSICurlFilesystemHandler::GetCachedFileProp(const char* pszURL)
1565 : {
1566 2467 : CPLMutexHolder oHolder( &hMutex );
1567 :
1568 2467 : CachedFileProp* cachedFileProp = cacheFileSize[pszURL];
1569 2467 : if (cachedFileProp == NULL)
1570 : {
1571 500 : cachedFileProp = (CachedFileProp*) CPLMalloc(sizeof(CachedFileProp));
1572 500 : cachedFileProp->eExists = EXIST_UNKNOWN;
1573 500 : cachedFileProp->bHastComputedFileSize = FALSE;
1574 500 : cachedFileProp->fileSize = 0;
1575 500 : cachedFileProp->bIsDirectory = FALSE;
1576 500 : cacheFileSize[pszURL] = cachedFileProp;
1577 : }
1578 :
1579 2467 : return cachedFileProp;
1580 : }
1581 :
1582 : /************************************************************************/
1583 : /* Open() */
1584 : /************************************************************************/
1585 :
1586 58 : VSIVirtualHandle* VSICurlFilesystemHandler::Open( const char *pszFilename,
1587 : const char *pszAccess)
1588 : {
1589 58 : if (strchr(pszAccess, 'w') != NULL ||
1590 : strchr(pszAccess, '+') != NULL)
1591 : {
1592 : CPLError(CE_Failure, CPLE_AppDefined,
1593 0 : "Only read-only mode is supported for /vsicurl");
1594 0 : return NULL;
1595 : }
1596 :
1597 : const char* pszOptionVal =
1598 58 : CPLGetConfigOption( "GDAL_DISABLE_READDIR_ON_OPEN", "NO" );
1599 : int bSkipReadDir = EQUAL(pszOptionVal, "EMPTY_DIR") ||
1600 58 : CSLTestBoolean(pszOptionVal);
1601 :
1602 58 : CPLString osFilename(pszFilename);
1603 58 : int bGotFileList = TRUE;
1604 58 : if (strchr(CPLGetFilename(osFilename), '.') != NULL &&
1605 : strncmp(CPLGetExtension(osFilename), "zip", 3) != 0 && !bSkipReadDir)
1606 : {
1607 29 : char** papszFileList = ReadDir(CPLGetDirname(osFilename), &bGotFileList);
1608 29 : int bFound = (VSICurlIsFileInList(papszFileList, CPLGetFilename(osFilename)) != -1);
1609 29 : CSLDestroy(papszFileList);
1610 29 : if (bGotFileList && !bFound)
1611 : {
1612 10 : return NULL;
1613 : }
1614 : }
1615 :
1616 48 : VSICurlHandle* poHandle = new VSICurlHandle( this, osFilename + strlen("/vsicurl/"));
1617 48 : if (!bGotFileList)
1618 : {
1619 : /* If we didn't get a filelist, check that the file really exists */
1620 0 : if (!poHandle->Exists())
1621 : {
1622 0 : delete poHandle;
1623 0 : poHandle = NULL;
1624 : }
1625 : }
1626 48 : return poHandle;
1627 : }
1628 :
1629 : /************************************************************************/
1630 : /* VSICurlParserFindEOL() */
1631 : /* */
1632 : /* Small helper function for VSICurlPaseHTMLFileList() to find */
1633 : /* the end of a line in the directory listing. Either a <br> */
1634 : /* or newline. */
1635 : /************************************************************************/
1636 :
1637 615 : static char *VSICurlParserFindEOL( char *pszData )
1638 :
1639 : {
1640 33417 : while( *pszData != '\0' && *pszData != '\n' && !EQUALN(pszData,"<br>",4) )
1641 32187 : pszData++;
1642 :
1643 615 : if( *pszData == '\0' )
1644 14 : return NULL;
1645 : else
1646 601 : return pszData;
1647 : }
1648 :
1649 :
1650 : /************************************************************************/
1651 : /* VSICurlParseHTMLDateTimeFileSize() */
1652 : /************************************************************************/
1653 :
1654 : static const char* const apszMonths[] = { "January", "February", "March",
1655 : "April", "May", "June", "July",
1656 : "August", "September", "October",
1657 : "November", "December" };
1658 :
1659 503 : static int VSICurlParseHTMLDateTimeFileSize(const char* pszStr,
1660 : struct tm& brokendowntime,
1661 : GUIntBig& nFileSize,
1662 : GIntBig& mTime)
1663 : {
1664 : int iMonth;
1665 6487 : for(iMonth=0;iMonth<12;iMonth++)
1666 : {
1667 : char szMonth[32];
1668 5990 : szMonth[0] = '-';
1669 5990 : memcpy(szMonth + 1, apszMonths[iMonth], 3);
1670 5990 : szMonth[4] = '-';
1671 5990 : szMonth[5] = '\0';
1672 5990 : const char* pszMonthFound = strstr(pszStr, szMonth);
1673 5990 : if (pszMonthFound)
1674 : {
1675 : /* Format of Apache, like in http://download.osgeo.org/gdal/data/gtiff/ */
1676 : /* "17-May-2010 12:26" */
1677 18 : if (pszMonthFound - pszStr > 2 && strlen(pszMonthFound) > 15 &&
1678 12 : pszMonthFound[-2 + 11] == ' ' && pszMonthFound[-2 + 14] == ':')
1679 : {
1680 6 : pszMonthFound -= 2;
1681 6 : int nDay = atoi(pszMonthFound);
1682 6 : int nYear = atoi(pszMonthFound + 7);
1683 6 : int nHour = atoi(pszMonthFound + 12);
1684 6 : int nMin = atoi(pszMonthFound + 15);
1685 6 : if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1686 : nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1687 : {
1688 6 : brokendowntime.tm_year = nYear - 1900;
1689 6 : brokendowntime.tm_mon = iMonth;
1690 6 : brokendowntime.tm_mday = nDay;
1691 6 : brokendowntime.tm_hour = nHour;
1692 6 : brokendowntime.tm_min = nMin;
1693 6 : mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1694 :
1695 6 : return TRUE;
1696 : }
1697 : }
1698 0 : return FALSE;
1699 : }
1700 :
1701 : /* Microsoft IIS */
1702 5984 : szMonth[0] = ' ';
1703 5984 : strcpy(szMonth + 1, apszMonths[iMonth]);
1704 5984 : strcat(szMonth, " ");
1705 5984 : pszMonthFound = strstr(pszStr, szMonth);
1706 5984 : if (pszMonthFound)
1707 : {
1708 0 : int nLenMonth = strlen(apszMonths[iMonth]);
1709 0 : if (pszMonthFound - pszStr > 2 &&
1710 0 : pszMonthFound[-1] != ',' &&
1711 0 : pszMonthFound[-2] != ' ' &&
1712 : (int)strlen(pszMonthFound-2) > 2 + 1 + nLenMonth + 1 + 4 + 1 + 5 + 1 + 4)
1713 : {
1714 : /* Format of http://ortho.linz.govt.nz/tifs/1994_95/ */
1715 : /* " Friday, 21 April 2006 12:05 p.m. 48062343 m35a_fy_94_95.tif" */
1716 0 : pszMonthFound -= 2;
1717 0 : int nDay = atoi(pszMonthFound);
1718 0 : int nCurOffset = 2 + 1 + nLenMonth + 1;
1719 0 : int nYear = atoi(pszMonthFound + nCurOffset);
1720 0 : nCurOffset += 4 + 1;
1721 0 : int nHour = atoi(pszMonthFound + nCurOffset);
1722 0 : if (nHour < 10)
1723 0 : nCurOffset += 1 + 1;
1724 : else
1725 0 : nCurOffset += 2 + 1;
1726 0 : int nMin = atoi(pszMonthFound + nCurOffset);
1727 0 : nCurOffset += 2 + 1;
1728 0 : if (strncmp(pszMonthFound + nCurOffset, "p.m.", 4) == 0)
1729 0 : nHour += 12;
1730 0 : else if (strncmp(pszMonthFound + nCurOffset, "a.m.", 4) != 0)
1731 0 : nHour = -1;
1732 0 : nCurOffset += 4;
1733 :
1734 0 : const char* pszFilesize = pszMonthFound + nCurOffset;
1735 0 : while(*pszFilesize == ' ')
1736 0 : pszFilesize ++;
1737 0 : if (*pszFilesize >= '1' && *pszFilesize <= '9')
1738 0 : nFileSize = CPLScanUIntBig(pszFilesize, strlen(pszFilesize));
1739 :
1740 0 : if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1741 : nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1742 : {
1743 0 : brokendowntime.tm_year = nYear - 1900;
1744 0 : brokendowntime.tm_mon = iMonth;
1745 0 : brokendowntime.tm_mday = nDay;
1746 0 : brokendowntime.tm_hour = nHour;
1747 0 : brokendowntime.tm_min = nMin;
1748 0 : mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1749 :
1750 0 : return TRUE;
1751 : }
1752 0 : nFileSize = 0;
1753 : }
1754 0 : else if (pszMonthFound - pszStr > 1 &&
1755 0 : pszMonthFound[-1] == ',' &&
1756 : (int)strlen(pszMonthFound) > 1 + nLenMonth + 1 + 2 + 1 + 1 + 4 + 1 + 5 + 1 + 2)
1757 : {
1758 : /* Format of http://publicfiles.dep.state.fl.us/dear/BWR_GIS/2007NWFLULC/ */
1759 : /* " Sunday, June 20, 2010 6:46 PM 233170905 NWF2007LULCForSDE.zip" */
1760 0 : pszMonthFound += 1;
1761 0 : int nCurOffset = nLenMonth + 1;
1762 0 : int nDay = atoi(pszMonthFound + nCurOffset);
1763 0 : nCurOffset += 2 + 1 + 1;
1764 0 : int nYear = atoi(pszMonthFound + nCurOffset);
1765 0 : nCurOffset += 4 + 1;
1766 0 : int nHour = atoi(pszMonthFound + nCurOffset);
1767 0 : nCurOffset += 2 + 1;
1768 0 : int nMin = atoi(pszMonthFound + nCurOffset);
1769 0 : nCurOffset += 2 + 1;
1770 0 : if (strncmp(pszMonthFound + nCurOffset, "PM", 2) == 0)
1771 0 : nHour += 12;
1772 0 : else if (strncmp(pszMonthFound + nCurOffset, "AM", 2) != 0)
1773 0 : nHour = -1;
1774 0 : nCurOffset += 2;
1775 :
1776 0 : const char* pszFilesize = pszMonthFound + nCurOffset;
1777 0 : while(*pszFilesize == ' ')
1778 0 : pszFilesize ++;
1779 0 : if (*pszFilesize >= '1' && *pszFilesize <= '9')
1780 0 : nFileSize = CPLScanUIntBig(pszFilesize, strlen(pszFilesize));
1781 :
1782 0 : if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1783 : nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1784 : {
1785 0 : brokendowntime.tm_year = nYear - 1900;
1786 0 : brokendowntime.tm_mon = iMonth;
1787 0 : brokendowntime.tm_mday = nDay;
1788 0 : brokendowntime.tm_hour = nHour;
1789 0 : brokendowntime.tm_min = nMin;
1790 0 : mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1791 :
1792 0 : return TRUE;
1793 : }
1794 0 : nFileSize = 0;
1795 : }
1796 0 : return FALSE;
1797 : }
1798 : }
1799 :
1800 497 : return FALSE;
1801 : }
1802 :
1803 : /************************************************************************/
1804 : /* ParseHTMLFileList() */
1805 : /* */
1806 : /* Parse a file list document and return all the components. */
1807 : /************************************************************************/
1808 :
1809 14 : char** VSICurlFilesystemHandler::ParseHTMLFileList(const char* pszFilename,
1810 : char* pszData,
1811 : int* pbGotFileList)
1812 : {
1813 14 : CPLStringList oFileList;
1814 14 : char* pszLine = pszData;
1815 : char* c;
1816 14 : int nCount = 0;
1817 14 : int bIsHTMLDirList = FALSE;
1818 14 : CPLString osExpectedString;
1819 14 : CPLString osExpectedString2;
1820 14 : CPLString osExpectedString3;
1821 14 : CPLString osExpectedString4;
1822 14 : CPLString osExpectedString_unescaped;
1823 :
1824 14 : *pbGotFileList = FALSE;
1825 :
1826 : const char* pszDir;
1827 14 : if (EQUALN(pszFilename, "/vsicurl/http://", strlen("/vsicurl/http://")))
1828 14 : pszDir = strchr(pszFilename + strlen("/vsicurl/http://"), '/');
1829 0 : else if (EQUALN(pszFilename, "/vsicurl/https://", strlen("/vsicurl/https://")))
1830 0 : pszDir = strchr(pszFilename + strlen("/vsicurl/https://"), '/');
1831 : else
1832 0 : pszDir = strchr(pszFilename + strlen("/vsicurl/ftp://"), '/');
1833 14 : if (pszDir == NULL)
1834 0 : pszDir = "";
1835 : /* Apache */
1836 14 : osExpectedString = "<title>Index of ";
1837 14 : osExpectedString += pszDir;
1838 14 : osExpectedString += "</title>";
1839 : /* shttpd */
1840 14 : osExpectedString2 = "<title>Index of ";
1841 14 : osExpectedString2 += pszDir;
1842 14 : osExpectedString2 += "/</title>";
1843 : /* FTP */
1844 14 : osExpectedString3 = "FTP Listing of ";
1845 14 : osExpectedString3 += pszDir;
1846 14 : osExpectedString3 += "/";
1847 : /* Apache 1.3.33 */
1848 14 : osExpectedString4 = "<TITLE>Index of ";
1849 14 : osExpectedString4 += pszDir;
1850 14 : osExpectedString4 += "</TITLE>";
1851 :
1852 : /* The listing of http://dds.cr.usgs.gov/srtm/SRTM_image_sample/picture%20examples/ */
1853 : /* has "<title>Index of /srtm/SRTM_image_sample/picture examples</title>" so we must */
1854 : /* try unescaped %20 also */
1855 : /* Similar with http://datalib.usask.ca/gis/Data/Central_America_goodbutdoweown%3f/ */
1856 14 : if (strchr(pszDir, '%'))
1857 : {
1858 0 : char* pszUnescapedDir = CPLUnescapeString(pszDir, NULL, CPLES_URL);
1859 0 : osExpectedString_unescaped = "<title>Index of ";
1860 0 : osExpectedString_unescaped += pszUnescapedDir;
1861 0 : osExpectedString_unescaped += "</title>";
1862 0 : CPLFree(pszUnescapedDir);
1863 : }
1864 :
1865 14 : int nCountTable = 0;
1866 :
1867 629 : while( (c = VSICurlParserFindEOL( pszLine )) != NULL )
1868 : {
1869 601 : *c = 0;
1870 :
1871 : /* To avoid false positive on pages such as http://www.ngs.noaa.gov/PC_PROD/USGG2009BETA */
1872 : /* This is a heuristics, but normal HTML listing of files have not more than one table */
1873 601 : if (strstr(pszLine, "<table"))
1874 : {
1875 2 : nCountTable ++;
1876 2 : if (nCountTable == 2)
1877 : {
1878 0 : *pbGotFileList = FALSE;
1879 0 : return NULL;
1880 : }
1881 : }
1882 :
1883 601 : if (!bIsHTMLDirList &&
1884 : (strstr(pszLine, osExpectedString.c_str()) ||
1885 : strstr(pszLine, osExpectedString2.c_str()) ||
1886 : strstr(pszLine, osExpectedString3.c_str()) ||
1887 : strstr(pszLine, osExpectedString4.c_str()) ||
1888 : (osExpectedString_unescaped.size() != 0 && strstr(pszLine, osExpectedString_unescaped.c_str()))))
1889 : {
1890 2 : bIsHTMLDirList = TRUE;
1891 2 : *pbGotFileList = TRUE;
1892 : }
1893 : /* Subversion HTTP listing */
1894 : /* or Microsoft-IIS/6.0 listing (e.g. http://ortho.linz.govt.nz/tifs/2005_06/) */
1895 611 : else if (!bIsHTMLDirList && strstr(pszLine, "<title>"))
1896 : {
1897 : /* Detect something like : <html><head><title>gdal - Revision 20739: /trunk/autotest/gcore/data</title></head> */
1898 : /* The annoying thing is that what is after ': ' is a subpart of what is after http://server/ */
1899 12 : char* pszSubDir = strstr(pszLine, ": ");
1900 12 : if (pszSubDir == NULL)
1901 : /* or <title>ortho.linz.govt.nz - /tifs/2005_06/</title> */
1902 0 : pszSubDir = strstr(pszLine, "- ");
1903 12 : if (pszSubDir)
1904 : {
1905 12 : pszSubDir += 2;
1906 12 : char* pszTmp = strstr(pszSubDir, "</title>");
1907 12 : if (pszTmp)
1908 : {
1909 12 : if (pszTmp[-1] == '/')
1910 0 : pszTmp[-1] = 0;
1911 : else
1912 12 : *pszTmp = 0;
1913 12 : if (strstr(pszDir, pszSubDir))
1914 : {
1915 12 : bIsHTMLDirList = TRUE;
1916 12 : *pbGotFileList = TRUE;
1917 : }
1918 : }
1919 : }
1920 : }
1921 587 : else if (bIsHTMLDirList &&
1922 : (strstr(pszLine, "<a href=\"") != NULL || strstr(pszLine, "<A HREF=\"") != NULL) &&
1923 : strstr(pszLine, "<a href=\"http://") == NULL && /* exclude absolute links, like to subversion home */
1924 : strstr(pszLine, "Parent Directory") == NULL /* exclude parent directory */)
1925 : {
1926 505 : char *beginFilename = strstr(pszLine, "<a href=\"");
1927 505 : if (beginFilename == NULL)
1928 0 : beginFilename = strstr(pszLine, "<A HREF=\"");
1929 505 : beginFilename += strlen("<a href=\"");
1930 505 : char *endQuote = strchr(beginFilename, '"');
1931 505 : if (endQuote && strncmp(beginFilename, "?C=", 3) != 0 && strncmp(beginFilename, "?N=", 3) != 0)
1932 : {
1933 : struct tm brokendowntime;
1934 503 : memset(&brokendowntime, 0, sizeof(brokendowntime));
1935 503 : GUIntBig nFileSize = 0;
1936 503 : GIntBig mTime = 0;
1937 :
1938 : VSICurlParseHTMLDateTimeFileSize(pszLine,
1939 : brokendowntime,
1940 : nFileSize,
1941 503 : mTime);
1942 :
1943 503 : *endQuote = '\0';
1944 :
1945 : /* Remove trailing slash, that are returned for directories by */
1946 : /* Apache */
1947 503 : int bIsDirectory = FALSE;
1948 503 : if (endQuote[-1] == '/')
1949 : {
1950 22 : bIsDirectory = TRUE;
1951 22 : endQuote[-1] = 0;
1952 : }
1953 :
1954 : /* shttpd links include slashes from the root directory. Skip them */
1955 1006 : while(strchr(beginFilename, '/'))
1956 0 : beginFilename = strchr(beginFilename, '/') + 1;
1957 :
1958 503 : if (strcmp(beginFilename, ".") != 0 &&
1959 : strcmp(beginFilename, "..") != 0)
1960 : {
1961 : CPLString osCachedFilename =
1962 491 : CPLSPrintf("%s/%s", pszFilename + strlen("/vsicurl/"), beginFilename);
1963 491 : CachedFileProp* cachedFileProp = GetCachedFileProp(osCachedFilename);
1964 491 : cachedFileProp->eExists = EXIST_YES;
1965 491 : cachedFileProp->bIsDirectory = bIsDirectory;
1966 491 : cachedFileProp->mTime = mTime;
1967 491 : cachedFileProp->bHastComputedFileSize = nFileSize > 0;
1968 491 : cachedFileProp->fileSize = nFileSize;
1969 :
1970 491 : oFileList.AddString( beginFilename );
1971 : if (ENABLE_DEBUG)
1972 : CPLDebug("VSICURL", "File[%d] = %s, is_dir = %d, size = " CPL_FRMT_GUIB ", time = %04d/%02d/%02d %02d:%02d:%02d",
1973 : nCount, beginFilename, bIsDirectory, nFileSize,
1974 : brokendowntime.tm_year + 1900, brokendowntime.tm_mon + 1, brokendowntime.tm_mday,
1975 491 : brokendowntime.tm_hour, brokendowntime.tm_min, brokendowntime.tm_sec);
1976 491 : nCount ++;
1977 : }
1978 : }
1979 : }
1980 601 : pszLine = c + 1;
1981 : }
1982 :
1983 14 : return oFileList.StealList();
1984 : }
1985 :
1986 :
1987 : /************************************************************************/
1988 : /* VSICurlGetToken() */
1989 : /************************************************************************/
1990 :
1991 16 : static char* VSICurlGetToken(char* pszCurPtr, char** ppszNextToken)
1992 : {
1993 16 : if (pszCurPtr == NULL)
1994 0 : return NULL;
1995 :
1996 32 : while((*pszCurPtr) == ' ')
1997 0 : pszCurPtr ++;
1998 16 : if (*pszCurPtr == '\0')
1999 0 : return NULL;
2000 :
2001 16 : char* pszToken = pszCurPtr;
2002 104 : while((*pszCurPtr) != ' ' && (*pszCurPtr) != '\0')
2003 72 : pszCurPtr ++;
2004 16 : if (*pszCurPtr == '\0')
2005 0 : *ppszNextToken = NULL;
2006 : else
2007 : {
2008 16 : *pszCurPtr = '\0';
2009 16 : pszCurPtr ++;
2010 56 : while((*pszCurPtr) == ' ')
2011 24 : pszCurPtr ++;
2012 16 : *ppszNextToken = pszCurPtr;
2013 : }
2014 :
2015 16 : return pszToken;
2016 : }
2017 :
2018 : /************************************************************************/
2019 : /* VSICurlParseFullFTPLine() */
2020 : /************************************************************************/
2021 :
2022 : /* Parse lines like the following ones :
2023 : -rw-r--r-- 1 10003 100 430 Jul 04 2008 COPYING
2024 : lrwxrwxrwx 1 ftp ftp 28 Jun 14 14:13 MPlayer -> mirrors/mplayerhq.hu/MPlayer
2025 : -rw-r--r-- 1 ftp ftp 725614592 May 13 20:13 Fedora-15-x86_64-Live-KDE.iso
2026 : drwxr-xr-x 280 1003 1003 6656 Aug 26 04:17 gnu
2027 : */
2028 :
2029 2 : static int VSICurlParseFullFTPLine(char* pszLine,
2030 : char*& pszFilename,
2031 : int& bSizeValid,
2032 : GUIntBig& nSize,
2033 : int& bIsDirectory,
2034 : GIntBig& nUnixTime)
2035 : {
2036 2 : char* pszNextToken = pszLine;
2037 2 : char* pszPermissions = VSICurlGetToken(pszNextToken, &pszNextToken);
2038 2 : if (pszPermissions == NULL || strlen(pszPermissions) != 10)
2039 0 : return FALSE;
2040 2 : bIsDirectory = (pszPermissions[0] == 'd');
2041 :
2042 : int i;
2043 8 : for(i = 0; i < 3; i++)
2044 : {
2045 6 : if (VSICurlGetToken(pszNextToken, &pszNextToken) == NULL)
2046 0 : return FALSE;
2047 : }
2048 :
2049 2 : char* pszSize = VSICurlGetToken(pszNextToken, &pszNextToken);
2050 2 : if (pszSize == NULL)
2051 0 : return FALSE;
2052 :
2053 2 : if (pszPermissions[0] == '-')
2054 : {
2055 : /* Regular file */
2056 2 : bSizeValid = TRUE;
2057 2 : nSize = CPLScanUIntBig(pszSize, strlen(pszSize));
2058 : }
2059 :
2060 : struct tm brokendowntime;
2061 2 : memset(&brokendowntime, 0, sizeof(brokendowntime));
2062 2 : int bBrokenDownTimeValid = TRUE;
2063 :
2064 2 : char* pszMonth = VSICurlGetToken(pszNextToken, &pszNextToken);
2065 2 : if (pszMonth == NULL || strlen(pszMonth) != 3)
2066 0 : return FALSE;
2067 :
2068 20 : for(i = 0; i < 12; i++)
2069 : {
2070 20 : if (EQUALN(pszMonth, apszMonths[i], 3))
2071 2 : break;
2072 : }
2073 2 : if (i < 12)
2074 2 : brokendowntime.tm_mon = i;
2075 : else
2076 0 : bBrokenDownTimeValid = FALSE;
2077 :
2078 2 : char* pszDay = VSICurlGetToken(pszNextToken, &pszNextToken);
2079 2 : if (pszDay == NULL || (strlen(pszDay) != 1 && strlen(pszDay) != 2))
2080 0 : return FALSE;
2081 2 : int nDay = atoi(pszDay);
2082 4 : if (nDay >= 1 && nDay <= 31)
2083 2 : brokendowntime.tm_mday = nDay;
2084 : else
2085 0 : bBrokenDownTimeValid = FALSE;
2086 :
2087 2 : char* pszHourOrYear = VSICurlGetToken(pszNextToken, &pszNextToken);
2088 2 : if (pszHourOrYear == NULL || (strlen(pszHourOrYear) != 4 && strlen(pszHourOrYear) != 5))
2089 0 : return FALSE;
2090 2 : if (strlen(pszHourOrYear) == 4)
2091 : {
2092 2 : brokendowntime.tm_year = atoi(pszHourOrYear) - 1900;
2093 : }
2094 : else
2095 : {
2096 : time_t sTime;
2097 0 : time(&sTime);
2098 : struct tm currentBrokendowntime;
2099 0 : CPLUnixTimeToYMDHMS((GIntBig)sTime, ¤tBrokendowntime);
2100 0 : brokendowntime.tm_year = currentBrokendowntime.tm_year;
2101 0 : brokendowntime.tm_hour = atoi(pszHourOrYear);
2102 0 : brokendowntime.tm_min = atoi(pszHourOrYear + 3);
2103 : }
2104 :
2105 2 : if (bBrokenDownTimeValid)
2106 2 : nUnixTime = CPLYMDHMSToUnixTime(&brokendowntime);
2107 : else
2108 0 : nUnixTime = 0;
2109 :
2110 2 : if (pszNextToken == NULL)
2111 0 : return FALSE;
2112 :
2113 2 : pszFilename = pszNextToken;
2114 :
2115 2 : char* pszCurPtr = pszFilename;
2116 33 : while( *pszCurPtr != '\0')
2117 : {
2118 : /* In case of a link, stop before the pointed part of the link */
2119 29 : if (pszPermissions[0] == 'l' && strncmp(pszCurPtr, " -> ", 4) == 0)
2120 : {
2121 0 : break;
2122 : }
2123 29 : pszCurPtr ++;
2124 : }
2125 2 : *pszCurPtr = '\0';
2126 :
2127 2 : return TRUE;
2128 : }
2129 :
2130 : /************************************************************************/
2131 : /* GetFileList() */
2132 : /************************************************************************/
2133 :
2134 15 : char** VSICurlFilesystemHandler::GetFileList(const char *pszDirname, int* pbGotFileList)
2135 : {
2136 : if (ENABLE_DEBUG)
2137 15 : CPLDebug("VSICURL", "GetFileList(%s)" , pszDirname);
2138 :
2139 15 : *pbGotFileList = FALSE;
2140 :
2141 15 : if (strncmp(pszDirname, "/vsicurl/ftp", strlen("/vsicurl/ftp")) == 0)
2142 : {
2143 : WriteFuncStruct sWriteFuncData;
2144 1 : sWriteFuncData.pBuffer = NULL;
2145 :
2146 1 : CPLString osDirname(pszDirname + strlen("/vsicurl/"));
2147 1 : osDirname += '/';
2148 :
2149 1 : char** papszFileList = NULL;
2150 :
2151 1 : for(int iTry=0;iTry<2;iTry++)
2152 : {
2153 1 : CURL* hCurlHandle = GetCurlHandleFor(osDirname);
2154 1 : VSICurlSetOptions(hCurlHandle, osDirname.c_str());
2155 :
2156 : /* On the first pass, we want to try fetching all the possible */
2157 : /* informations (filename, file/directory, size). If that */
2158 : /* does not work, then try again with CURLOPT_DIRLISTONLY set */
2159 1 : if (iTry == 1)
2160 : {
2161 : /* 7.16.4 */
2162 : #if LIBCURL_VERSION_NUM <= 0x071004
2163 : curl_easy_setopt(hCurlHandle, CURLOPT_FTPLISTONLY, 1);
2164 : #elif LIBCURL_VERSION_NUM > 0x071004
2165 0 : curl_easy_setopt(hCurlHandle, CURLOPT_DIRLISTONLY, 1);
2166 : #endif
2167 : }
2168 :
2169 1 : VSICURLInitWriteFuncStruct(&sWriteFuncData);
2170 1 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
2171 1 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
2172 :
2173 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
2174 1 : szCurlErrBuf[0] = '\0';
2175 1 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
2176 :
2177 1 : curl_easy_perform(hCurlHandle);
2178 :
2179 1 : if (sWriteFuncData.pBuffer == NULL)
2180 0 : return NULL;
2181 :
2182 1 : char* pszLine = sWriteFuncData.pBuffer;
2183 : char* c;
2184 1 : int nCount = 0;
2185 :
2186 1 : if (EQUALN(pszLine, "<!DOCTYPE HTML", strlen("<!DOCTYPE HTML")) ||
2187 : EQUALN(pszLine, "<HTML>", 6))
2188 : {
2189 : papszFileList = ParseHTMLFileList(pszDirname,
2190 : sWriteFuncData.pBuffer,
2191 0 : pbGotFileList);
2192 0 : break;
2193 : }
2194 1 : else if (iTry == 0)
2195 : {
2196 1 : CPLStringList oFileList;
2197 1 : *pbGotFileList = TRUE;
2198 :
2199 4 : while( (c = strchr(pszLine, '\n')) != NULL)
2200 : {
2201 2 : *c = 0;
2202 2 : if (c - pszLine > 0 && c[-1] == '\r')
2203 0 : c[-1] = 0;
2204 :
2205 2 : char* pszFilename = NULL;
2206 2 : int bSizeValid = FALSE;
2207 2 : GUIntBig nFileSize = 0;
2208 2 : int bIsDirectory = FALSE;
2209 2 : GIntBig mUnixTime = 0;
2210 2 : if (!VSICurlParseFullFTPLine(pszLine, pszFilename,
2211 : bSizeValid, nFileSize,
2212 : bIsDirectory, mUnixTime))
2213 0 : break;
2214 :
2215 2 : if (strcmp(pszFilename, ".") != 0 &&
2216 : strcmp(pszFilename, "..") != 0)
2217 : {
2218 : CPLString osCachedFilename =
2219 2 : CPLSPrintf("%s/%s", pszDirname + strlen("/vsicurl/"), pszFilename);
2220 2 : CachedFileProp* cachedFileProp = GetCachedFileProp(osCachedFilename);
2221 2 : cachedFileProp->eExists = EXIST_YES;
2222 2 : cachedFileProp->bHastComputedFileSize = bSizeValid;
2223 2 : cachedFileProp->fileSize = nFileSize;
2224 2 : cachedFileProp->bIsDirectory = bIsDirectory;
2225 2 : cachedFileProp->mTime = mUnixTime;
2226 :
2227 2 : oFileList.AddString(pszFilename);
2228 : if (ENABLE_DEBUG)
2229 : {
2230 : struct tm brokendowntime;
2231 2 : CPLUnixTimeToYMDHMS(mUnixTime, &brokendowntime);
2232 : CPLDebug("VSICURL", "File[%d] = %s, is_dir = %d, size = " CPL_FRMT_GUIB ", time = %04d/%02d/%02d %02d:%02d:%02d",
2233 : nCount, pszFilename, bIsDirectory, nFileSize,
2234 : brokendowntime.tm_year + 1900, brokendowntime.tm_mon + 1, brokendowntime.tm_mday,
2235 2 : brokendowntime.tm_hour, brokendowntime.tm_min, brokendowntime.tm_sec);
2236 : }
2237 :
2238 2 : nCount ++;
2239 : }
2240 :
2241 2 : pszLine = c + 1;
2242 : }
2243 :
2244 1 : if (c == NULL)
2245 : {
2246 1 : papszFileList = oFileList.StealList();
2247 : break;
2248 0 : }
2249 : }
2250 : else
2251 : {
2252 0 : CPLStringList oFileList;
2253 0 : *pbGotFileList = TRUE;
2254 :
2255 0 : while( (c = strchr(pszLine, '\n')) != NULL)
2256 : {
2257 0 : *c = 0;
2258 0 : if (c - pszLine > 0 && c[-1] == '\r')
2259 0 : c[-1] = 0;
2260 :
2261 0 : if (strcmp(pszLine, ".") != 0 &&
2262 : strcmp(pszLine, "..") != 0)
2263 : {
2264 0 : oFileList.AddString(pszLine);
2265 : if (ENABLE_DEBUG)
2266 0 : CPLDebug("VSICURL", "File[%d] = %s", nCount, pszLine);
2267 0 : nCount ++;
2268 : }
2269 :
2270 0 : pszLine = c + 1;
2271 : }
2272 :
2273 0 : papszFileList = oFileList.StealList();
2274 : }
2275 :
2276 0 : CPLFree(sWriteFuncData.pBuffer);
2277 0 : sWriteFuncData.pBuffer = NULL;
2278 : }
2279 :
2280 1 : CPLFree(sWriteFuncData.pBuffer);
2281 :
2282 1 : return papszFileList;
2283 : }
2284 :
2285 : /* Try to recognize HTML pages that list the content of a directory */
2286 : /* Currently this supports what Apache and shttpd can return */
2287 14 : else if (strncmp(pszDirname, "/vsicurl/http://", strlen("/vsicurl/http://")) == 0 ||
2288 : strncmp(pszDirname, "/vsicurl/https://", strlen("/vsicurl/https://")) == 0)
2289 : {
2290 : WriteFuncStruct sWriteFuncData;
2291 :
2292 14 : CPLString osDirname(pszDirname + strlen("/vsicurl/"));
2293 14 : osDirname += '/';
2294 :
2295 : #if LIBCURL_VERSION_NUM < 0x070B00
2296 : /* Curl 7.10.X doesn't manage to unset the CURLOPT_RANGE that would have been */
2297 : /* previously set, so we have to reinit the connection handle */
2298 : GetCurlHandleFor("");
2299 : #endif
2300 :
2301 14 : CURL* hCurlHandle = GetCurlHandleFor(osDirname);
2302 14 : VSICurlSetOptions(hCurlHandle, osDirname.c_str());
2303 :
2304 14 : curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, NULL);
2305 :
2306 14 : VSICURLInitWriteFuncStruct(&sWriteFuncData);
2307 14 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
2308 14 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
2309 :
2310 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
2311 14 : szCurlErrBuf[0] = '\0';
2312 14 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
2313 :
2314 14 : curl_easy_perform(hCurlHandle);
2315 :
2316 14 : if (sWriteFuncData.pBuffer == NULL)
2317 0 : return NULL;
2318 :
2319 : char** papszFileList = ParseHTMLFileList(pszDirname,
2320 : sWriteFuncData.pBuffer,
2321 14 : pbGotFileList);
2322 :
2323 14 : CPLFree(sWriteFuncData.pBuffer);
2324 14 : return papszFileList;
2325 : }
2326 :
2327 0 : return NULL;
2328 : }
2329 :
2330 : /************************************************************************/
2331 : /* Stat() */
2332 : /************************************************************************/
2333 :
2334 659 : int VSICurlFilesystemHandler::Stat( const char *pszFilename, VSIStatBufL *pStatBuf,
2335 : int nFlags )
2336 : {
2337 659 : CPLString osFilename(pszFilename);
2338 :
2339 659 : memset(pStatBuf, 0, sizeof(VSIStatBufL));
2340 :
2341 : const char* pszOptionVal =
2342 659 : CPLGetConfigOption( "GDAL_DISABLE_READDIR_ON_OPEN", "NO" );
2343 : int bSkipReadDir = EQUAL(pszOptionVal, "EMPTY_DIR") ||
2344 659 : CSLTestBoolean(pszOptionVal);
2345 :
2346 : /* Does it look like a FTP directory ? */
2347 661 : if (strncmp(osFilename, "/vsicurl/ftp", strlen("/vsicurl/ftp")) == 0 &&
2348 2 : pszFilename[strlen(osFilename) - 1] == '/' && !bSkipReadDir)
2349 : {
2350 0 : char** papszFileList = ReadDir(osFilename);
2351 0 : if (papszFileList)
2352 : {
2353 0 : pStatBuf->st_mode = S_IFDIR;
2354 0 : pStatBuf->st_size = 0;
2355 :
2356 0 : CSLDestroy(papszFileList);
2357 :
2358 0 : return 0;
2359 : }
2360 0 : return -1;
2361 : }
2362 659 : else if (strchr(CPLGetFilename(osFilename), '.') != NULL &&
2363 : strncmp(CPLGetExtension(osFilename), "zip", 3) != 0 &&
2364 : !bSkipReadDir)
2365 : {
2366 : int bGotFileList;
2367 605 : char** papszFileList = ReadDir(CPLGetDirname(osFilename), &bGotFileList);
2368 605 : int bFound = (VSICurlIsFileInList(papszFileList, CPLGetFilename(osFilename)) != -1);
2369 605 : CSLDestroy(papszFileList);
2370 605 : if (bGotFileList && !bFound)
2371 : {
2372 17 : return -1;
2373 : }
2374 : }
2375 :
2376 642 : VSICurlHandle oHandle( this, osFilename + strlen("/vsicurl/"));
2377 :
2378 642 : if ( oHandle.IsKnownFileSize() ||
2379 : ((nFlags & VSI_STAT_SIZE_FLAG) && !oHandle.IsDirectory() &&
2380 : CSLTestBoolean(CPLGetConfigOption("CPL_VSIL_CURL_SLOW_GET_SIZE", "YES"))) )
2381 10 : pStatBuf->st_size = oHandle.GetFileSize();
2382 :
2383 642 : int nRet = (oHandle.Exists()) ? 0 : -1;
2384 642 : pStatBuf->st_mtime = oHandle.GetMTime();
2385 642 : pStatBuf->st_mode = oHandle.IsDirectory() ? S_IFDIR : S_IFREG;
2386 642 : return nRet;
2387 : }
2388 :
2389 : /************************************************************************/
2390 : /* Unlink() */
2391 : /************************************************************************/
2392 :
2393 0 : int VSICurlFilesystemHandler::Unlink( const char *pszFilename )
2394 : {
2395 0 : return -1;
2396 : }
2397 :
2398 : /************************************************************************/
2399 : /* Rename() */
2400 : /************************************************************************/
2401 :
2402 0 : int VSICurlFilesystemHandler::Rename( const char *oldpath, const char *newpath )
2403 : {
2404 0 : return -1;
2405 : }
2406 :
2407 : /************************************************************************/
2408 : /* Mkdir() */
2409 : /************************************************************************/
2410 :
2411 0 : int VSICurlFilesystemHandler::Mkdir( const char *pszDirname, long nMode )
2412 : {
2413 0 : return -1;
2414 : }
2415 : /************************************************************************/
2416 : /* Rmdir() */
2417 : /************************************************************************/
2418 :
2419 0 : int VSICurlFilesystemHandler::Rmdir( const char *pszDirname )
2420 : {
2421 0 : return -1;
2422 : }
2423 :
2424 : /************************************************************************/
2425 : /* ReadDir() */
2426 : /************************************************************************/
2427 :
2428 1260 : char** VSICurlFilesystemHandler::ReadDir( const char *pszDirname, int* pbGotFileList )
2429 : {
2430 1260 : CPLString osDirname(pszDirname);
2431 2520 : while (osDirname[strlen(osDirname) - 1] == '/')
2432 0 : osDirname.erase(strlen(osDirname) - 1);
2433 :
2434 1260 : const char* pszUpDir = strstr(osDirname, "/..");
2435 1260 : if (pszUpDir != NULL)
2436 : {
2437 0 : int pos = pszUpDir - osDirname.c_str() - 1;
2438 0 : while(pos >= 0 && osDirname[pos] != '/')
2439 0 : pos --;
2440 0 : if (pos >= 1)
2441 : {
2442 0 : osDirname = osDirname.substr(0, pos) + CPLString(pszUpDir + 3);
2443 : }
2444 : }
2445 :
2446 1260 : CPLMutexHolder oHolder( &hMutex );
2447 :
2448 : /* If we know the file exists and is not a directory, then don't try to list its content */
2449 1260 : CachedFileProp* cachedFileProp = GetCachedFileProp(osDirname.c_str() + strlen("/vsicurl/"));
2450 1260 : if (cachedFileProp->eExists == EXIST_YES && !cachedFileProp->bIsDirectory)
2451 : {
2452 600 : if (pbGotFileList)
2453 0 : *pbGotFileList = TRUE;
2454 600 : return NULL;
2455 : }
2456 :
2457 660 : CachedDirList* psCachedDirList = cacheDirList[osDirname];
2458 660 : if (psCachedDirList == NULL)
2459 : {
2460 15 : psCachedDirList = (CachedDirList*) CPLMalloc(sizeof(CachedDirList));
2461 15 : psCachedDirList->papszFileList = GetFileList(osDirname, &psCachedDirList->bGotFileList);
2462 15 : cacheDirList[osDirname] = psCachedDirList;
2463 : }
2464 :
2465 660 : if (pbGotFileList)
2466 634 : *pbGotFileList = psCachedDirList->bGotFileList;
2467 :
2468 660 : return CSLDuplicate(psCachedDirList->papszFileList);
2469 : }
2470 :
2471 : /************************************************************************/
2472 : /* ReadDir() */
2473 : /************************************************************************/
2474 :
2475 626 : char** VSICurlFilesystemHandler::ReadDir( const char *pszDirname )
2476 : {
2477 626 : return ReadDir(pszDirname, NULL);
2478 : }
2479 :
2480 : /************************************************************************/
2481 : /* VSIInstallCurlFileHandler() */
2482 : /************************************************************************/
2483 :
2484 : /**
2485 : * \brief Install /vsicurl/ HTTP/FTP file system handler (requires libcurl)
2486 : *
2487 : * A special file handler is installed that allows reading on-the-fly of files
2488 : * available through HTTP/FTP web protocols, without downloading the entire file.
2489 : *
2490 : * Recognized filenames are of the form /vsicurl/http://path/to/remote/ressource or
2491 : * /vsicurl/ftp://path/to/remote/ressource where path/to/remote/ressource is the
2492 : * URL of a remote ressource.
2493 : *
2494 : * Partial downloads (requires the HTTP server to support random reading) are done
2495 : * with a 16 KB granularity by default. If the driver detects sequential reading
2496 : * it will progressively increase the chunk size up to 2 MB to improve download
2497 : * performance.
2498 : *
2499 : * The GDAL_HTTP_PROXY and GDAL_HTTP_PROXYUSERPWD configuration options can be
2500 : * used to define a proxy server. The syntax to use is the one of Curl CURLOPT_PROXY
2501 : * and CURLOPT_PROXYUSERPWD options.
2502 : *
2503 : * VSIStatL() will return the size in st_size member and file
2504 : * nature- file or directory - in st_mode member (the later only reliable with FTP
2505 : * resources for now).
2506 : *
2507 : * VSIReadDir() should be able to parse the HTML directory listing returned by the
2508 : * most popular web servers, such as Apache or Microsoft IIS.
2509 : *
2510 : * This special file handler can be combined with other virtual filesystems handlers,
2511 : * such as /vsizip. For example, /vsizip//vsicurl/path/to/remote/file.zip/path/inside/zip
2512 : *
2513 : * @since GDAL 1.8.0
2514 : */
2515 647 : void VSIInstallCurlFileHandler(void)
2516 : {
2517 647 : VSIFileManager::InstallHandler( "/vsicurl/", new VSICurlFilesystemHandler );
2518 647 : }
2519 :
2520 :
2521 : #endif /* HAVE_CURL */
|