1 : /******************************************************************************
2 : * $Id: cpl_vsil_curl.cpp 25311 2012-12-15 12:48:14Z rouault $
3 : *
4 : * Project: CPL - Common Portability Library
5 : * Purpose: Implement VSI large file api for HTTP/FTP files
6 : * Author: Even Rouault, even.rouault at mines-paris.org
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 2008, Even Rouault
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "cpl_vsi_virtual.h"
31 : #include "cpl_string.h"
32 : #include "cpl_multiproc.h"
33 : #include "cpl_hash_set.h"
34 : #include "cpl_time.h"
35 : #include "cpl_vsil_curl_priv.h"
36 :
37 : CPL_CVSID("$Id: cpl_vsil_curl.cpp 25311 2012-12-15 12:48:14Z rouault $");
38 :
39 : #ifndef HAVE_CURL
40 :
41 : void VSIInstallCurlFileHandler(void)
42 : {
43 : /* not supported */
44 : }
45 :
46 : /************************************************************************/
47 : /* VSICurlInstallReadCbk() */
48 : /************************************************************************/
49 :
50 : int VSICurlInstallReadCbk (VSILFILE* fp,
51 : VSICurlReadCbkFunc pfnReadCbk,
52 : void* pfnUserData,
53 : int bStopOnInterrruptUntilUninstall)
54 : {
55 : return FALSE;
56 : }
57 :
58 :
59 : /************************************************************************/
60 : /* VSICurlUninstallReadCbk() */
61 : /************************************************************************/
62 :
63 : int VSICurlUninstallReadCbk(VSILFILE* fp)
64 : {
65 : return FALSE;
66 : }
67 :
68 : #else
69 :
70 : #include <curl/curl.h>
71 :
72 : #include <map>
73 :
74 : #define ENABLE_DEBUG 1
75 :
76 : #define N_MAX_REGIONS 1000
77 :
78 : #define DOWNLOAD_CHUNCK_SIZE 16384
79 :
80 : typedef enum
81 : {
82 : EXIST_UNKNOWN = -1,
83 : EXIST_NO,
84 : EXIST_YES,
85 : } ExistStatus;
86 :
87 : typedef struct
88 : {
89 : ExistStatus eExists;
90 : int bHastComputedFileSize;
91 : vsi_l_offset fileSize;
92 : int bIsDirectory;
93 : time_t mTime;
94 : } CachedFileProp;
95 :
96 : typedef struct
97 : {
98 : int bGotFileList;
99 : char** papszFileList; /* only file name without path */
100 : } CachedDirList;
101 :
102 : typedef struct
103 : {
104 : unsigned long pszURLHash;
105 : vsi_l_offset nFileOffsetStart;
106 : size_t nSize;
107 : char *pData;
108 : } CachedRegion;
109 :
110 :
111 0 : static const char* VSICurlGetCacheFileName()
112 : {
113 0 : return "gdal_vsicurl_cache.bin";
114 : }
115 :
116 : /************************************************************************/
117 : /* VSICurlFindStringSensitiveExceptEscapeSequences() */
118 : /************************************************************************/
119 :
120 844 : static int VSICurlFindStringSensitiveExceptEscapeSequences( char ** papszList,
121 : const char * pszTarget )
122 :
123 : {
124 : int i;
125 :
126 844 : if( papszList == NULL )
127 60 : return -1;
128 :
129 73289 : for( i = 0; papszList[i] != NULL; i++ )
130 : {
131 73263 : const char* pszIter1 = papszList[i];
132 73263 : const char* pszIter2 = pszTarget;
133 : char ch1, ch2;
134 : /* The comparison is case-sensitive, escape for escaped */
135 : /* sequences where letters of the hexadecimal sequence */
136 : /* can be uppercase or lowercase depending on the quoting algorithm */
137 40811 : while(TRUE)
138 : {
139 114074 : ch1 = *pszIter1;
140 114074 : ch2 = *pszIter2;
141 114074 : if (ch1 == '\0' || ch2 == '\0')
142 767 : break;
143 113307 : if (ch1 == '%' && ch2 == '%' &&
144 0 : pszIter1[1] != '\0' && pszIter1[2] != '\0' &&
145 0 : pszIter2[1] != '\0' && pszIter2[2] != '\0')
146 : {
147 0 : if (!EQUALN(pszIter1+1, pszIter2+1, 2))
148 0 : break;
149 0 : pszIter1 += 2;
150 0 : pszIter2 += 2;
151 : }
152 113307 : if (ch1 != ch2)
153 72496 : break;
154 40811 : pszIter1 ++;
155 40811 : pszIter2 ++;
156 : }
157 73263 : if (ch1 == ch2 && ch1 == '\0')
158 758 : return i;
159 : }
160 :
161 26 : return -1;
162 : }
163 :
164 : /************************************************************************/
165 : /* VSICurlIsFileInList() */
166 : /************************************************************************/
167 :
168 814 : static int VSICurlIsFileInList( char ** papszList, const char * pszTarget )
169 : {
170 814 : int nRet = VSICurlFindStringSensitiveExceptEscapeSequences(papszList, pszTarget);
171 814 : if (nRet >= 0)
172 758 : return nRet;
173 :
174 : /* If we didn't find anything, try to URL-escape the target filename */
175 56 : char* pszEscaped = CPLEscapeString(pszTarget, -1, CPLES_URL);
176 56 : if (strcmp(pszTarget, pszEscaped) != 0)
177 : {
178 30 : nRet = VSICurlFindStringSensitiveExceptEscapeSequences(papszList, pszEscaped);
179 : }
180 56 : CPLFree(pszEscaped);
181 56 : return nRet;
182 : }
183 :
184 : /************************************************************************/
185 : /* VSICurlFilesystemHandler */
186 : /************************************************************************/
187 :
188 : typedef struct
189 : {
190 : CPLString osURL;
191 : CURL *hCurlHandle;
192 4 : } CachedConnection;
193 :
194 :
195 : class VSICurlFilesystemHandler : public VSIFilesystemHandler
196 : {
197 : void *hMutex;
198 :
199 : CachedRegion **papsRegions;
200 : int nRegions;
201 :
202 : std::map<CPLString, CachedFileProp*> cacheFileSize;
203 : std::map<CPLString, CachedDirList*> cacheDirList;
204 :
205 : int bUseCacheDisk;
206 :
207 : /* Per-thread Curl connection cache */
208 : std::map<GIntBig, CachedConnection*> mapConnections;
209 :
210 : char** GetFileList(const char *pszFilename, int* pbGotFileList);
211 :
212 : char** ParseHTMLFileList(const char* pszFilename,
213 : char* pszData,
214 : int* pbGotFileList);
215 : public:
216 : VSICurlFilesystemHandler();
217 : ~VSICurlFilesystemHandler();
218 :
219 : virtual VSIVirtualHandle *Open( const char *pszFilename,
220 : const char *pszAccess);
221 : virtual int Stat( const char *pszFilename, VSIStatBufL *pStatBuf, int nFlags );
222 : virtual int Unlink( const char *pszFilename );
223 : virtual int Rename( const char *oldpath, const char *newpath );
224 : virtual int Mkdir( const char *pszDirname, long nMode );
225 : virtual int Rmdir( const char *pszDirname );
226 : virtual char **ReadDir( const char *pszDirname );
227 : virtual char **ReadDir( const char *pszDirname, int* pbGotFileList );
228 :
229 :
230 : const CachedRegion* GetRegion(const char* pszURL,
231 : vsi_l_offset nFileOffsetStart);
232 :
233 : void AddRegion(const char* pszURL,
234 : vsi_l_offset nFileOffsetStart,
235 : size_t nSize,
236 : const char *pData);
237 :
238 : CachedFileProp* GetCachedFileProp(const char* pszURL);
239 :
240 : void AddRegionToCacheDisk(CachedRegion* psRegion);
241 : const CachedRegion* GetRegionFromCacheDisk(const char* pszURL,
242 : vsi_l_offset nFileOffsetStart);
243 :
244 : CURL *GetCurlHandleFor(CPLString osURL);
245 : };
246 :
247 : /************************************************************************/
248 : /* VSICurlHandle */
249 : /************************************************************************/
250 :
251 : class VSICurlHandle : public VSIVirtualHandle
252 : {
253 : private:
254 : VSICurlFilesystemHandler* poFS;
255 :
256 : char* pszURL;
257 :
258 : vsi_l_offset curOffset;
259 : vsi_l_offset fileSize;
260 : int bHastComputedFileSize;
261 : ExistStatus eExists;
262 : int bIsDirectory;
263 : time_t mTime;
264 :
265 : vsi_l_offset lastDownloadedOffset;
266 : int nBlocksToDownload;
267 : int bEOF;
268 :
269 : int DownloadRegion(vsi_l_offset startOffset, int nBlocks);
270 :
271 : VSICurlReadCbkFunc pfnReadCbk;
272 : void *pReadCbkUserData;
273 : int bStopOnInterrruptUntilUninstall;
274 : int bInterrupted;
275 :
276 : public:
277 :
278 : VSICurlHandle(VSICurlFilesystemHandler* poFS, const char* pszURL);
279 : ~VSICurlHandle();
280 :
281 : virtual int Seek( vsi_l_offset nOffset, int nWhence );
282 : virtual vsi_l_offset Tell();
283 : virtual size_t Read( void *pBuffer, size_t nSize, size_t nMemb );
284 : virtual int ReadMultiRange( int nRanges, void ** ppData,
285 : const vsi_l_offset* panOffsets, const size_t* panSizes );
286 : virtual size_t Write( const void *pBuffer, size_t nSize, size_t nMemb );
287 : virtual int Eof();
288 : virtual int Flush();
289 : virtual int Close();
290 :
291 798 : int IsKnownFileSize() const { return bHastComputedFileSize; }
292 : vsi_l_offset GetFileSize();
293 : int Exists();
294 798 : int IsDirectory() const { return bIsDirectory; }
295 798 : time_t GetMTime() const { return mTime; }
296 :
297 : int InstallReadCbk(VSICurlReadCbkFunc pfnReadCbk,
298 : void* pfnUserData,
299 : int bStopOnInterrruptUntilUninstall);
300 : int UninstallReadCbk();
301 : };
302 :
303 : /************************************************************************/
304 : /* VSICurlHandle() */
305 : /************************************************************************/
306 :
307 874 : VSICurlHandle::VSICurlHandle(VSICurlFilesystemHandler* poFS, const char* pszURL)
308 : {
309 874 : this->poFS = poFS;
310 874 : this->pszURL = CPLStrdup(pszURL);
311 :
312 874 : curOffset = 0;
313 :
314 874 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
315 874 : eExists = cachedFileProp->eExists;
316 874 : fileSize = cachedFileProp->fileSize;
317 874 : bHastComputedFileSize = cachedFileProp->bHastComputedFileSize;
318 874 : bIsDirectory = cachedFileProp->bIsDirectory;
319 874 : mTime = cachedFileProp->mTime;
320 :
321 874 : lastDownloadedOffset = -1;
322 874 : nBlocksToDownload = 1;
323 874 : bEOF = FALSE;
324 :
325 874 : pfnReadCbk = NULL;
326 874 : pReadCbkUserData = NULL;
327 874 : bStopOnInterrruptUntilUninstall = FALSE;
328 874 : bInterrupted = FALSE;
329 874 : }
330 :
331 : /************************************************************************/
332 : /* ~VSICurlHandle() */
333 : /************************************************************************/
334 :
335 874 : VSICurlHandle::~VSICurlHandle()
336 : {
337 874 : CPLFree(pszURL);
338 874 : }
339 :
340 : /************************************************************************/
341 : /* InstallReadCbk() */
342 : /************************************************************************/
343 :
344 1 : int VSICurlHandle::InstallReadCbk(VSICurlReadCbkFunc pfnReadCbkIn,
345 : void* pfnUserDataIn,
346 : int bStopOnInterrruptUntilUninstallIn)
347 : {
348 1 : if (pfnReadCbk != NULL)
349 0 : return FALSE;
350 :
351 1 : pfnReadCbk = pfnReadCbkIn;
352 1 : pReadCbkUserData = pfnUserDataIn;
353 1 : bStopOnInterrruptUntilUninstall = bStopOnInterrruptUntilUninstallIn;
354 1 : bInterrupted = FALSE;
355 1 : return TRUE;
356 : }
357 :
358 : /************************************************************************/
359 : /* UninstallReadCbk() */
360 : /************************************************************************/
361 :
362 1 : int VSICurlHandle::UninstallReadCbk()
363 : {
364 1 : if (pfnReadCbk == NULL)
365 0 : return FALSE;
366 :
367 1 : pfnReadCbk = NULL;
368 1 : pReadCbkUserData = NULL;
369 1 : bStopOnInterrruptUntilUninstall = FALSE;
370 1 : bInterrupted = FALSE;
371 1 : return TRUE;
372 : }
373 :
374 : /************************************************************************/
375 : /* Seek() */
376 : /************************************************************************/
377 :
378 567 : int VSICurlHandle::Seek( vsi_l_offset nOffset, int nWhence )
379 : {
380 567 : if (nWhence == SEEK_SET)
381 : {
382 354 : curOffset = nOffset;
383 : }
384 213 : else if (nWhence == SEEK_CUR)
385 : {
386 152 : curOffset = curOffset + nOffset;
387 : }
388 : else
389 : {
390 61 : curOffset = GetFileSize() + nOffset;
391 : }
392 567 : bEOF = FALSE;
393 567 : return 0;
394 : }
395 :
396 : /************************************************************************/
397 : /* VSICurlSetOptions() */
398 : /************************************************************************/
399 :
400 68 : static void VSICurlSetOptions(CURL* hCurlHandle, const char* pszURL)
401 : {
402 68 : curl_easy_setopt(hCurlHandle, CURLOPT_URL, pszURL);
403 68 : if (CSLTestBoolean(CPLGetConfigOption("CPL_CURL_VERBOSE", "NO")))
404 0 : curl_easy_setopt(hCurlHandle, CURLOPT_VERBOSE, 1);
405 :
406 : /* Set Proxy parameters */
407 68 : const char* pszProxy = CPLGetConfigOption("GDAL_HTTP_PROXY", NULL);
408 68 : if (pszProxy)
409 0 : curl_easy_setopt(hCurlHandle,CURLOPT_PROXY,pszProxy);
410 :
411 68 : const char* pszProxyUserPwd = CPLGetConfigOption("GDAL_HTTP_PROXYUSERPWD", NULL);
412 68 : if (pszProxyUserPwd)
413 0 : curl_easy_setopt(hCurlHandle,CURLOPT_PROXYUSERPWD,pszProxyUserPwd);
414 :
415 : /* Enable following redirections. Requires libcurl 7.10.1 at least */
416 68 : curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 1);
417 68 : curl_easy_setopt(hCurlHandle, CURLOPT_MAXREDIRS, 10);
418 :
419 : /* 7.16 */
420 : #if LIBCURL_VERSION_NUM >= 0x071000
421 68 : long option = CURLFTPMETHOD_SINGLECWD;
422 68 : curl_easy_setopt(hCurlHandle, CURLOPT_FTP_FILEMETHOD, option);
423 : #endif
424 :
425 : /* 7.12.3 */
426 : #if LIBCURL_VERSION_NUM > 0x070C03
427 : /* ftp://ftp2.cits.rncan.gc.ca/pub/cantopo/250k_tif/ doesn't like EPSV command */
428 68 : curl_easy_setopt(hCurlHandle, CURLOPT_FTP_USE_EPSV, 0);
429 : #endif
430 :
431 : /* NOSIGNAL should be set to true for timeout to work in multithread
432 : environments on Unix, requires libcurl 7.10 or more recent.
433 : (this force avoiding the use of sgnal handlers) */
434 :
435 : /* 7.10 */
436 : #if LIBCURL_VERSION_NUM >= 0x070A00
437 68 : curl_easy_setopt(hCurlHandle, CURLOPT_NOSIGNAL, 1);
438 : #endif
439 :
440 68 : curl_easy_setopt(hCurlHandle, CURLOPT_NOBODY, 0);
441 68 : curl_easy_setopt(hCurlHandle, CURLOPT_HTTPGET, 1);
442 68 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADER, 0);
443 :
444 : /* 7.16.4 */
445 : #if LIBCURL_VERSION_NUM <= 0x071004
446 : curl_easy_setopt(hCurlHandle, CURLOPT_FTPLISTONLY, 0);
447 : #elif LIBCURL_VERSION_NUM > 0x071004
448 68 : curl_easy_setopt(hCurlHandle, CURLOPT_DIRLISTONLY, 0);
449 : #endif
450 :
451 68 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
452 68 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
453 68 : }
454 :
455 :
456 : typedef struct
457 : {
458 : char* pBuffer;
459 : size_t nSize;
460 : int bIsHTTP;
461 : int bIsInHeader;
462 : int bMultiRange;
463 : vsi_l_offset nStartOffset;
464 : vsi_l_offset nEndOffset;
465 : int nHTTPCode;
466 : vsi_l_offset nContentLength;
467 : int bFoundContentRange;
468 : int bError;
469 : int bDownloadHeaderOnly;
470 :
471 : VSILFILE *fp;
472 : VSICurlReadCbkFunc pfnReadCbk;
473 : void *pReadCbkUserData;
474 : int bInterrupted;
475 : } WriteFuncStruct;
476 :
477 : /************************************************************************/
478 : /* VSICURLInitWriteFuncStruct() */
479 : /************************************************************************/
480 :
481 119 : static void VSICURLInitWriteFuncStruct(WriteFuncStruct *psStruct,
482 : VSILFILE *fp,
483 : VSICurlReadCbkFunc pfnReadCbk,
484 : void *pReadCbkUserData)
485 : {
486 119 : psStruct->pBuffer = NULL;
487 119 : psStruct->nSize = 0;
488 119 : psStruct->bIsHTTP = FALSE;
489 119 : psStruct->bIsInHeader = TRUE;
490 119 : psStruct->bMultiRange = FALSE;
491 119 : psStruct->nStartOffset = 0;
492 119 : psStruct->nEndOffset = 0;
493 119 : psStruct->nHTTPCode = 0;
494 119 : psStruct->nContentLength = 0;
495 119 : psStruct->bFoundContentRange = FALSE;
496 119 : psStruct->bError = FALSE;
497 119 : psStruct->bDownloadHeaderOnly = FALSE;
498 :
499 119 : psStruct->fp = fp;
500 119 : psStruct->pfnReadCbk = pfnReadCbk;
501 119 : psStruct->pReadCbkUserData = pReadCbkUserData;
502 119 : psStruct->bInterrupted = FALSE;
503 119 : }
504 :
505 : /************************************************************************/
506 : /* VSICurlHandleWriteFunc() */
507 : /************************************************************************/
508 :
509 1500 : static int VSICurlHandleWriteFunc(void *buffer, size_t count, size_t nmemb, void *req)
510 : {
511 1500 : WriteFuncStruct* psStruct = (WriteFuncStruct*) req;
512 1500 : size_t nSize = count * nmemb;
513 :
514 : char* pNewBuffer = (char*) VSIRealloc(psStruct->pBuffer,
515 1500 : psStruct->nSize + nSize + 1);
516 1500 : if (pNewBuffer)
517 : {
518 1500 : psStruct->pBuffer = pNewBuffer;
519 1500 : memcpy(psStruct->pBuffer + psStruct->nSize, buffer, nSize);
520 1500 : psStruct->pBuffer[psStruct->nSize + nSize] = '\0';
521 2072 : if (psStruct->bIsHTTP && psStruct->bIsInHeader)
522 : {
523 574 : char* pszLine = psStruct->pBuffer + psStruct->nSize;
524 617 : if (EQUALN(pszLine, "HTTP/1.0 ", 9) ||
525 : EQUALN(pszLine, "HTTP/1.1 ", 9))
526 43 : psStruct->nHTTPCode = atoi(pszLine + 9);
527 531 : else if (EQUALN(pszLine, "Content-Length: ", 16))
528 : psStruct->nContentLength = CPLScanUIntBig(pszLine + 16,
529 42 : strlen(pszLine + 16));
530 489 : else if (EQUALN(pszLine, "Content-Range: ", 15))
531 9 : psStruct->bFoundContentRange = TRUE;
532 :
533 : /*if (nSize > 2 && pszLine[nSize - 2] == '\r' &&
534 : pszLine[nSize - 1] == '\n')
535 : {
536 : pszLine[nSize - 2] = 0;
537 : CPLDebug("VSICURL", "%s", pszLine);
538 : pszLine[nSize - 2] = '\r';
539 : }*/
540 :
541 574 : if (pszLine[0] == '\r' || pszLine[0] == '\n')
542 : {
543 43 : if (psStruct->bDownloadHeaderOnly)
544 : {
545 : /* If moved permanently/temporarily, go on. Otherwise stop now*/
546 3 : if (!(psStruct->nHTTPCode == 301 || psStruct->nHTTPCode == 302))
547 2 : return 0;
548 : }
549 : else
550 : {
551 40 : psStruct->bIsInHeader = FALSE;
552 :
553 : /* Detect servers that don't support range downloading */
554 40 : if (psStruct->nHTTPCode == 200 &&
555 : !psStruct->bMultiRange &&
556 : !psStruct->bFoundContentRange &&
557 : (psStruct->nStartOffset != 0 || psStruct->nContentLength > 10 *
558 : (psStruct->nEndOffset - psStruct->nStartOffset + 1)))
559 : {
560 : CPLError(CE_Failure, CPLE_AppDefined,
561 0 : "Range downloading not supported by this server !");
562 0 : psStruct->bError = TRUE;
563 0 : return 0;
564 : }
565 : }
566 : }
567 : }
568 : else
569 : {
570 926 : if (psStruct->pfnReadCbk)
571 : {
572 6 : if ( ! psStruct->pfnReadCbk(psStruct->fp, buffer, nSize,
573 : psStruct->pReadCbkUserData) )
574 : {
575 1 : psStruct->bInterrupted = TRUE;
576 1 : return 0;
577 : }
578 : }
579 : }
580 1497 : psStruct->nSize += nSize;
581 1497 : return nmemb;
582 : }
583 : else
584 : {
585 0 : return 0;
586 : }
587 : }
588 :
589 :
590 : /************************************************************************/
591 : /* GetFileSize() */
592 : /************************************************************************/
593 :
594 94 : vsi_l_offset VSICurlHandle::GetFileSize()
595 : {
596 : WriteFuncStruct sWriteFuncData;
597 : WriteFuncStruct sWriteFuncHeaderData;
598 :
599 94 : if (bHastComputedFileSize)
600 86 : return fileSize;
601 :
602 8 : bHastComputedFileSize = TRUE;
603 :
604 : /* Consider that only the files whose extension ends up with one that is */
605 : /* listed in CPL_VSIL_CURL_ALLOWED_EXTENSIONS exist on the server */
606 : /* This can speeds up dramatically open experience, in case the server */
607 : /* cannot return a file list */
608 : /* For example : */
609 : /* gdalinfo --config CPL_VSIL_CURL_ALLOWED_EXTENSIONS ".tif" /vsicurl/http://igskmncngs506.cr.usgs.gov/gmted/Global_tiles_GMTED/075darcsec/bln/W030/30N030W_20101117_gmted_bln075.tif */
610 : const char* pszAllowedExtensions =
611 8 : CPLGetConfigOption("CPL_VSIL_CURL_ALLOWED_EXTENSIONS", NULL);
612 8 : if (pszAllowedExtensions)
613 : {
614 1 : char** papszExtensions = CSLTokenizeString2( pszAllowedExtensions, ", ", 0 );
615 1 : int nURLLen = strlen(pszURL);
616 1 : int bFound = FALSE;
617 1 : for(int i=0;papszExtensions[i] != NULL;i++)
618 : {
619 1 : int nExtensionLen = strlen(papszExtensions[i]);
620 2 : if (nURLLen > nExtensionLen &&
621 1 : EQUAL(pszURL + nURLLen - nExtensionLen, papszExtensions[i]))
622 : {
623 1 : bFound = TRUE;
624 1 : break;
625 : }
626 : }
627 :
628 1 : if (!bFound)
629 : {
630 0 : eExists = EXIST_NO;
631 0 : fileSize = 0;
632 :
633 0 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
634 0 : cachedFileProp->bHastComputedFileSize = TRUE;
635 0 : cachedFileProp->fileSize = fileSize;
636 0 : cachedFileProp->eExists = eExists;
637 :
638 0 : CSLDestroy(papszExtensions);
639 :
640 0 : return 0;
641 : }
642 :
643 1 : CSLDestroy(papszExtensions);
644 : }
645 :
646 : #if LIBCURL_VERSION_NUM < 0x070B00
647 : /* Curl 7.10.X doesn't manage to unset the CURLOPT_RANGE that would have been */
648 : /* previously set, so we have to reinit the connection handle */
649 : poFS->GetCurlHandleFor("");
650 : #endif
651 8 : CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
652 :
653 8 : VSICurlSetOptions(hCurlHandle, pszURL);
654 :
655 8 : VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, NULL, NULL, NULL);
656 :
657 : /* HACK for mbtiles driver: proper fix would be to auto-detect servers that don't accept HEAD */
658 : /* http://a.tiles.mapbox.com/v3/ doesn't accept HEAD, so let's start a GET */
659 : /* and interrupt is as soon as the header is found */
660 8 : if (strstr(pszURL, ".tiles.mapbox.com/") != NULL)
661 : {
662 2 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
663 2 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
664 :
665 2 : sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
666 2 : sWriteFuncHeaderData.bDownloadHeaderOnly = TRUE;
667 : }
668 : else
669 : {
670 6 : curl_easy_setopt(hCurlHandle, CURLOPT_NOBODY, 1);
671 6 : curl_easy_setopt(hCurlHandle, CURLOPT_HTTPGET, 0);
672 6 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADER, 1);
673 : }
674 :
675 : /* We need that otherwise OSGEO4W's libcurl issue a dummy range request */
676 : /* when doing a HEAD when recycling connections */
677 8 : curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, NULL);
678 :
679 : /* Bug with older curl versions (<=7.16.4) and FTP. See http://curl.haxx.se/mail/lib-2007-08/0312.html */
680 8 : VSICURLInitWriteFuncStruct(&sWriteFuncData, NULL, NULL, NULL);
681 8 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
682 8 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
683 :
684 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
685 8 : szCurlErrBuf[0] = '\0';
686 8 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
687 :
688 8 : double dfSize = 0;
689 8 : curl_easy_perform(hCurlHandle);
690 :
691 8 : eExists = EXIST_UNKNOWN;
692 :
693 8 : if (strncmp(pszURL, "ftp", 3) == 0)
694 : {
695 0 : if (sWriteFuncData.pBuffer != NULL &&
696 : strncmp(sWriteFuncData.pBuffer, "Content-Length: ", strlen( "Content-Length: ")) == 0)
697 : {
698 0 : const char* pszBuffer = sWriteFuncData.pBuffer + strlen("Content-Length: ");
699 0 : eExists = EXIST_YES;
700 0 : fileSize = CPLScanUIntBig(pszBuffer, sWriteFuncData.nSize - strlen("Content-Length: "));
701 : if (ENABLE_DEBUG)
702 : CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB,
703 0 : pszURL, fileSize);
704 : }
705 : }
706 :
707 8 : if (eExists != EXIST_YES)
708 : {
709 8 : CURLcode code = curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &dfSize );
710 8 : if (code == 0)
711 : {
712 8 : eExists = EXIST_YES;
713 8 : if (dfSize < 0)
714 0 : fileSize = 0;
715 : else
716 8 : fileSize = (GUIntBig)dfSize;
717 : }
718 : else
719 : {
720 0 : eExists = EXIST_NO;
721 0 : fileSize = 0;
722 0 : CPLError(CE_Failure, CPLE_AppDefined, "VSICurlHandle::GetFileSize failed");
723 : }
724 :
725 8 : long response_code = 0;
726 8 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
727 8 : if (response_code != 200)
728 : {
729 1 : eExists = EXIST_NO;
730 1 : fileSize = 0;
731 : }
732 :
733 : /* Try to guess if this is a directory. Generally if this is a directory, */
734 : /* curl will retry with an URL with slash added */
735 8 : char *pszEffectiveURL = NULL;
736 8 : curl_easy_getinfo(hCurlHandle, CURLINFO_EFFECTIVE_URL, &pszEffectiveURL);
737 15 : if (pszEffectiveURL != NULL && strncmp(pszURL, pszEffectiveURL, strlen(pszURL)) == 0 &&
738 7 : pszEffectiveURL[strlen(pszURL)] == '/')
739 : {
740 1 : eExists = EXIST_YES;
741 1 : fileSize = 0;
742 1 : bIsDirectory = TRUE;
743 : }
744 :
745 : if (ENABLE_DEBUG)
746 : CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB " response_code=%d",
747 8 : pszURL, fileSize, (int)response_code);
748 : }
749 :
750 8 : CPLFree(sWriteFuncData.pBuffer);
751 8 : CPLFree(sWriteFuncHeaderData.pBuffer);
752 :
753 8 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
754 8 : cachedFileProp->bHastComputedFileSize = TRUE;
755 8 : cachedFileProp->fileSize = fileSize;
756 8 : cachedFileProp->eExists = eExists;
757 8 : cachedFileProp->bIsDirectory = bIsDirectory;
758 :
759 8 : return fileSize;
760 : }
761 :
762 : /************************************************************************/
763 : /* Exists() */
764 : /************************************************************************/
765 :
766 815 : int VSICurlHandle::Exists()
767 : {
768 815 : if (eExists == EXIST_UNKNOWN)
769 6 : GetFileSize();
770 815 : return eExists == EXIST_YES;
771 : }
772 :
773 : /************************************************************************/
774 : /* Tell() */
775 : /************************************************************************/
776 :
777 160 : vsi_l_offset VSICurlHandle::Tell()
778 : {
779 160 : return curOffset;
780 : }
781 :
782 : /************************************************************************/
783 : /* DownloadRegion() */
784 : /************************************************************************/
785 :
786 42 : int VSICurlHandle::DownloadRegion(vsi_l_offset startOffset, int nBlocks)
787 : {
788 : WriteFuncStruct sWriteFuncData;
789 : WriteFuncStruct sWriteFuncHeaderData;
790 :
791 42 : if (bInterrupted && bStopOnInterrruptUntilUninstall)
792 0 : return FALSE;
793 :
794 42 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
795 42 : if (cachedFileProp->eExists == EXIST_NO)
796 0 : return FALSE;
797 :
798 42 : CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
799 42 : VSICurlSetOptions(hCurlHandle, pszURL);
800 :
801 42 : VSICURLInitWriteFuncStruct(&sWriteFuncData, (VSILFILE*)this, pfnReadCbk, pReadCbkUserData);
802 42 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
803 42 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
804 :
805 42 : VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, NULL, NULL, NULL);
806 42 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
807 42 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
808 42 : sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
809 42 : sWriteFuncHeaderData.nStartOffset = startOffset;
810 42 : sWriteFuncHeaderData.nEndOffset = startOffset + nBlocks * DOWNLOAD_CHUNCK_SIZE - 1;
811 :
812 : char rangeStr[512];
813 42 : sprintf(rangeStr, CPL_FRMT_GUIB "-" CPL_FRMT_GUIB, startOffset, startOffset + nBlocks * DOWNLOAD_CHUNCK_SIZE - 1);
814 :
815 : if (ENABLE_DEBUG)
816 42 : CPLDebug("VSICURL", "Downloading %s (%s)...", rangeStr, pszURL);
817 :
818 42 : curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, rangeStr);
819 :
820 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
821 42 : szCurlErrBuf[0] = '\0';
822 42 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
823 :
824 42 : curl_easy_perform(hCurlHandle);
825 :
826 42 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, NULL);
827 42 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, NULL);
828 42 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
829 42 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
830 :
831 42 : if (sWriteFuncData.bInterrupted)
832 : {
833 1 : bInterrupted = TRUE;
834 :
835 1 : CPLFree(sWriteFuncData.pBuffer);
836 1 : CPLFree(sWriteFuncHeaderData.pBuffer);
837 :
838 1 : return FALSE;
839 : }
840 :
841 41 : long response_code = 0;
842 41 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
843 :
844 41 : char *content_type = 0;
845 41 : curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_TYPE, &content_type);
846 :
847 : if (ENABLE_DEBUG)
848 41 : CPLDebug("VSICURL", "Got reponse_code=%ld", response_code);
849 :
850 41 : if ((response_code != 200 && response_code != 206 &&
851 : response_code != 225 && response_code != 226 && response_code != 426) || sWriteFuncHeaderData.bError)
852 : {
853 0 : if (response_code >= 400 && szCurlErrBuf[0] != '\0')
854 : {
855 0 : if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
856 : CPLError(CE_Failure, CPLE_AppDefined, "%d: %s, %s",
857 : (int)response_code, szCurlErrBuf,
858 0 : "Range downloading not supported by this server !");
859 : else
860 0 : CPLError(CE_Failure, CPLE_AppDefined, "%d: %s", (int)response_code, szCurlErrBuf);
861 : }
862 0 : if (!bHastComputedFileSize && startOffset == 0)
863 : {
864 0 : cachedFileProp->bHastComputedFileSize = bHastComputedFileSize = TRUE;
865 0 : cachedFileProp->fileSize = fileSize = 0;
866 0 : cachedFileProp->eExists = eExists = EXIST_NO;
867 : }
868 0 : CPLFree(sWriteFuncData.pBuffer);
869 0 : CPLFree(sWriteFuncHeaderData.pBuffer);
870 0 : return FALSE;
871 : }
872 :
873 41 : if (!bHastComputedFileSize && sWriteFuncHeaderData.pBuffer)
874 : {
875 : /* Try to retrieve the filesize from the HTTP headers */
876 : /* if in the form : "Content-Range: bytes x-y/filesize" */
877 9 : char* pszContentRange = strstr(sWriteFuncHeaderData.pBuffer, "Content-Range: bytes ");
878 9 : if (pszContentRange)
879 : {
880 4 : char* pszEOL = strchr(pszContentRange, '\n');
881 4 : if (pszEOL)
882 : {
883 4 : *pszEOL = 0;
884 4 : pszEOL = strchr(pszContentRange, '\r');
885 4 : if (pszEOL)
886 4 : *pszEOL = 0;
887 4 : char* pszSlash = strchr(pszContentRange, '/');
888 4 : if (pszSlash)
889 : {
890 4 : pszSlash ++;
891 4 : fileSize = CPLScanUIntBig(pszSlash, strlen(pszSlash));
892 : }
893 : }
894 : }
895 5 : else if (strncmp(pszURL, "ftp", 3) == 0)
896 : {
897 : /* Parse 213 answer for FTP protocol */
898 0 : char* pszSize = strstr(sWriteFuncHeaderData.pBuffer, "213 ");
899 0 : if (pszSize)
900 : {
901 0 : pszSize += 4;
902 0 : char* pszEOL = strchr(pszSize, '\n');
903 0 : if (pszEOL)
904 : {
905 0 : *pszEOL = 0;
906 0 : pszEOL = strchr(pszSize, '\r');
907 0 : if (pszEOL)
908 0 : *pszEOL = 0;
909 :
910 0 : fileSize = CPLScanUIntBig(pszSize, strlen(pszSize));
911 : }
912 : }
913 : }
914 :
915 9 : if (fileSize != 0)
916 : {
917 4 : eExists = EXIST_YES;
918 :
919 : if (ENABLE_DEBUG)
920 : CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB " response_code=%d",
921 4 : pszURL, fileSize, (int)response_code);
922 :
923 4 : bHastComputedFileSize = cachedFileProp->bHastComputedFileSize = TRUE;
924 4 : cachedFileProp->fileSize = fileSize;
925 4 : cachedFileProp->eExists = eExists;
926 : }
927 : }
928 :
929 41 : lastDownloadedOffset = startOffset + nBlocks * DOWNLOAD_CHUNCK_SIZE;
930 :
931 41 : char* pBuffer = sWriteFuncData.pBuffer;
932 41 : int nSize = sWriteFuncData.nSize;
933 :
934 41 : if (nSize > nBlocks * DOWNLOAD_CHUNCK_SIZE)
935 : {
936 : if (ENABLE_DEBUG)
937 : CPLDebug("VSICURL", "Got more data than expected : %d instead of %d",
938 0 : nSize, nBlocks * DOWNLOAD_CHUNCK_SIZE);
939 : }
940 :
941 126 : while(nSize > 0)
942 : {
943 : //if (ENABLE_DEBUG)
944 : // CPLDebug("VSICURL", "Add region %d - %d", startOffset, MIN(DOWNLOAD_CHUNCK_SIZE, nSize));
945 44 : poFS->AddRegion(pszURL, startOffset, MIN(DOWNLOAD_CHUNCK_SIZE, nSize), pBuffer);
946 44 : startOffset += DOWNLOAD_CHUNCK_SIZE;
947 44 : pBuffer += DOWNLOAD_CHUNCK_SIZE;
948 44 : nSize -= DOWNLOAD_CHUNCK_SIZE;
949 : }
950 :
951 41 : CPLFree(sWriteFuncData.pBuffer);
952 41 : CPLFree(sWriteFuncHeaderData.pBuffer);
953 :
954 41 : return TRUE;
955 : }
956 :
957 : /************************************************************************/
958 : /* Read() */
959 : /************************************************************************/
960 :
961 6130 : size_t VSICurlHandle::Read( void *pBuffer, size_t nSize, size_t nMemb )
962 : {
963 6130 : size_t nBufferRequestSize = nSize * nMemb;
964 6130 : if (nBufferRequestSize == 0)
965 0 : return 0;
966 :
967 : //CPLDebug("VSICURL", "offset=%d, size=%d", (int)curOffset, (int)nBufferRequestSize);
968 :
969 6130 : vsi_l_offset iterOffset = curOffset;
970 18380 : while (nBufferRequestSize)
971 : {
972 6132 : const CachedRegion* psRegion = poFS->GetRegion(pszURL, iterOffset);
973 6132 : if (psRegion == NULL)
974 : {
975 : vsi_l_offset nOffsetToDownload =
976 42 : (iterOffset / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
977 :
978 42 : if (nOffsetToDownload == lastDownloadedOffset)
979 : {
980 : /* In case of consecutive reads (of small size), we use a */
981 : /* heuristic that we will read the file sequentially, so */
982 : /* we double the requested size to decrease the number of */
983 : /* client/server roundtrips. */
984 1 : if (nBlocksToDownload < 100)
985 1 : nBlocksToDownload *= 2;
986 : }
987 : else
988 : {
989 : /* Random reads. Cancel the above heuristics */
990 41 : nBlocksToDownload = 1;
991 : }
992 :
993 : /* Ensure that we will request at least the number of blocks */
994 : /* to satisfy the remaining buffer size to read */
995 : vsi_l_offset nEndOffsetToDownload =
996 42 : ((iterOffset + nBufferRequestSize) / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
997 : int nMinBlocksToDownload = 1 + (int)
998 42 : ((nEndOffsetToDownload - nOffsetToDownload) / DOWNLOAD_CHUNCK_SIZE);
999 42 : if (nBlocksToDownload < nMinBlocksToDownload)
1000 2 : nBlocksToDownload = nMinBlocksToDownload;
1001 :
1002 : int i;
1003 : /* Avoid reading already cached data */
1004 45 : for(i=1;i<nBlocksToDownload;i++)
1005 : {
1006 3 : if (poFS->GetRegion(pszURL, nOffsetToDownload + i * DOWNLOAD_CHUNCK_SIZE) != NULL)
1007 : {
1008 0 : nBlocksToDownload = i;
1009 0 : break;
1010 : }
1011 : }
1012 :
1013 42 : if (DownloadRegion(nOffsetToDownload, nBlocksToDownload) == FALSE)
1014 : {
1015 1 : if (!bInterrupted)
1016 0 : bEOF = TRUE;
1017 1 : return 0;
1018 : }
1019 41 : psRegion = poFS->GetRegion(pszURL, iterOffset);
1020 : }
1021 6131 : if (psRegion == NULL || psRegion->pData == NULL)
1022 : {
1023 0 : bEOF = TRUE;
1024 0 : return 0;
1025 : }
1026 6131 : int nToCopy = (int) MIN(nBufferRequestSize, psRegion->nSize - (iterOffset - psRegion->nFileOffsetStart));
1027 : memcpy(pBuffer, psRegion->pData + iterOffset - psRegion->nFileOffsetStart,
1028 6131 : nToCopy);
1029 6131 : pBuffer = (char*) pBuffer + nToCopy;
1030 6131 : iterOffset += nToCopy;
1031 6131 : nBufferRequestSize -= nToCopy;
1032 6131 : if (psRegion->nSize != DOWNLOAD_CHUNCK_SIZE && nBufferRequestSize != 0)
1033 : {
1034 11 : break;
1035 : }
1036 : }
1037 :
1038 6129 : size_t ret = (size_t) ((iterOffset - curOffset) / nSize);
1039 6129 : if (ret != nMemb)
1040 11 : bEOF = TRUE;
1041 :
1042 6129 : curOffset = iterOffset;
1043 :
1044 6129 : return ret;
1045 : }
1046 :
1047 :
1048 : /************************************************************************/
1049 : /* ReadMultiRange() */
1050 : /************************************************************************/
1051 :
1052 1 : int VSICurlHandle::ReadMultiRange( int nRanges, void ** ppData,
1053 : const vsi_l_offset* panOffsets,
1054 : const size_t* panSizes )
1055 : {
1056 : WriteFuncStruct sWriteFuncData;
1057 : WriteFuncStruct sWriteFuncHeaderData;
1058 :
1059 1 : if (bInterrupted && bStopOnInterrruptUntilUninstall)
1060 0 : return FALSE;
1061 :
1062 1 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
1063 1 : if (cachedFileProp->eExists == EXIST_NO)
1064 0 : return -1;
1065 :
1066 1 : CPLString osRanges, osFirstRange, osLastRange;
1067 : int i;
1068 1 : int nMergedRanges = 0;
1069 1 : vsi_l_offset nTotalReqSize = 0;
1070 129 : for(i=0;i<nRanges;i++)
1071 : {
1072 128 : CPLString osCurRange;
1073 128 : if (i != 0)
1074 127 : osRanges.append(",");
1075 128 : osCurRange = CPLSPrintf(CPL_FRMT_GUIB "-", panOffsets[i]);
1076 256 : while (i + 1 < nRanges && panOffsets[i] + panSizes[i] == panOffsets[i+1])
1077 : {
1078 0 : nTotalReqSize += panSizes[i];
1079 0 : i ++;
1080 : }
1081 128 : nTotalReqSize += panSizes[i];
1082 128 : osCurRange.append(CPLSPrintf(CPL_FRMT_GUIB, panOffsets[i] + panSizes[i]-1));
1083 128 : nMergedRanges ++;
1084 :
1085 128 : osRanges += osCurRange;
1086 :
1087 128 : if (nMergedRanges == 1)
1088 1 : osFirstRange = osCurRange;
1089 128 : osLastRange = osCurRange;
1090 : }
1091 :
1092 1 : const char* pszMaxRanges = CPLGetConfigOption("CPL_VSIL_CURL_MAX_RANGES", "250");
1093 1 : int nMaxRanges = atoi(pszMaxRanges);
1094 1 : if (nMaxRanges <= 0)
1095 0 : nMaxRanges = 250;
1096 1 : if (nMergedRanges > nMaxRanges)
1097 : {
1098 0 : int nHalf = nRanges / 2;
1099 0 : int nRet = ReadMultiRange(nHalf, ppData, panOffsets, panSizes);
1100 0 : if (nRet != 0)
1101 0 : return nRet;
1102 0 : return ReadMultiRange(nRanges - nHalf, ppData + nHalf, panOffsets + nHalf, panSizes + nHalf);
1103 : }
1104 :
1105 1 : CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
1106 1 : VSICurlSetOptions(hCurlHandle, pszURL);
1107 :
1108 1 : VSICURLInitWriteFuncStruct(&sWriteFuncData, (VSILFILE*)this, pfnReadCbk, pReadCbkUserData);
1109 1 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
1110 1 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
1111 :
1112 1 : VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, NULL, NULL, NULL);
1113 1 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
1114 1 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
1115 1 : sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
1116 1 : sWriteFuncHeaderData.bMultiRange = nMergedRanges > 1;
1117 1 : if (nMergedRanges == 1)
1118 : {
1119 0 : sWriteFuncHeaderData.nStartOffset = panOffsets[0];
1120 0 : sWriteFuncHeaderData.nEndOffset = panOffsets[0] + nTotalReqSize-1;
1121 : }
1122 :
1123 : if (ENABLE_DEBUG)
1124 : {
1125 1 : if (nMergedRanges == 1)
1126 0 : CPLDebug("VSICURL", "Downloading %s (%s)...", osRanges.c_str(), pszURL);
1127 : else
1128 : CPLDebug("VSICURL", "Downloading %s, ..., %s (" CPL_FRMT_GUIB " bytes, %s)...",
1129 1 : osFirstRange.c_str(), osLastRange.c_str(), (GUIntBig)nTotalReqSize, pszURL);
1130 : }
1131 :
1132 1 : curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, osRanges.c_str());
1133 :
1134 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
1135 1 : szCurlErrBuf[0] = '\0';
1136 1 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
1137 :
1138 1 : curl_easy_perform(hCurlHandle);
1139 :
1140 1 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, NULL);
1141 1 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, NULL);
1142 1 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
1143 1 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
1144 :
1145 1 : if (sWriteFuncData.bInterrupted)
1146 : {
1147 0 : bInterrupted = TRUE;
1148 :
1149 0 : CPLFree(sWriteFuncData.pBuffer);
1150 0 : CPLFree(sWriteFuncHeaderData.pBuffer);
1151 :
1152 0 : return -1;
1153 : }
1154 :
1155 1 : long response_code = 0;
1156 1 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
1157 :
1158 1 : char *content_type = 0;
1159 1 : curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_TYPE, &content_type);
1160 :
1161 1 : if ((response_code != 200 && response_code != 206 &&
1162 : response_code != 225 && response_code != 226 && response_code != 426) || sWriteFuncHeaderData.bError)
1163 : {
1164 0 : if (response_code >= 400 && szCurlErrBuf[0] != '\0')
1165 : {
1166 0 : if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
1167 : CPLError(CE_Failure, CPLE_AppDefined, "%d: %s, %s",
1168 : (int)response_code, szCurlErrBuf,
1169 0 : "Range downloading not supported by this server !");
1170 : else
1171 0 : CPLError(CE_Failure, CPLE_AppDefined, "%d: %s", (int)response_code, szCurlErrBuf);
1172 : }
1173 : /*
1174 : if (!bHastComputedFileSize && startOffset == 0)
1175 : {
1176 : cachedFileProp->bHastComputedFileSize = bHastComputedFileSize = TRUE;
1177 : cachedFileProp->fileSize = fileSize = 0;
1178 : cachedFileProp->eExists = eExists = EXIST_NO;
1179 : }
1180 : */
1181 0 : CPLFree(sWriteFuncData.pBuffer);
1182 0 : CPLFree(sWriteFuncHeaderData.pBuffer);
1183 0 : return -1;
1184 : }
1185 :
1186 1 : char* pBuffer = sWriteFuncData.pBuffer;
1187 1 : int nSize = sWriteFuncData.nSize;
1188 :
1189 1 : int nRet = -1;
1190 : char* pszBoundary;
1191 1 : CPLString osBoundary;
1192 : char *pszNext;
1193 1 : int iRange = 0;
1194 1 : int iPart = 0;
1195 : char* pszEOL;
1196 :
1197 : /* -------------------------------------------------------------------- */
1198 : /* No multipart if a single range has been requested */
1199 : /* -------------------------------------------------------------------- */
1200 :
1201 1 : if (nMergedRanges == 1)
1202 : {
1203 0 : int nAccSize = 0;
1204 0 : if ((vsi_l_offset)nSize < nTotalReqSize)
1205 0 : goto end;
1206 :
1207 0 : for(i=0;i<nRanges;i++)
1208 : {
1209 0 : memcpy(ppData[i], pBuffer + nAccSize, panSizes[i]);
1210 0 : nAccSize += panSizes[i];
1211 : }
1212 :
1213 0 : nRet = 0;
1214 0 : goto end;
1215 : }
1216 :
1217 : /* -------------------------------------------------------------------- */
1218 : /* Extract boundary name */
1219 : /* -------------------------------------------------------------------- */
1220 :
1221 : pszBoundary = strstr(sWriteFuncHeaderData.pBuffer,
1222 1 : "Content-Type: multipart/byteranges; boundary=");
1223 1 : if( pszBoundary == NULL )
1224 : {
1225 : CPLError( CE_Failure, CPLE_AppDefined, "Could not find '%s'",
1226 0 : "Content-Type: multipart/byteranges; boundary=" );
1227 0 : goto end;
1228 : }
1229 :
1230 1 : pszBoundary += strlen( "Content-Type: multipart/byteranges; boundary=" );
1231 :
1232 1 : pszEOL = strchr(pszBoundary, '\r');
1233 1 : if (pszEOL)
1234 1 : *pszEOL = 0;
1235 1 : pszEOL = strchr(pszBoundary, '\n');
1236 1 : if (pszEOL)
1237 0 : *pszEOL = 0;
1238 :
1239 : /* Remove optional double-quote character around boundary name */
1240 1 : if (pszBoundary[0] == '"')
1241 : {
1242 0 : pszBoundary ++;
1243 0 : char* pszLastDoubleQuote = strrchr(pszBoundary, '"');
1244 0 : if (pszLastDoubleQuote)
1245 0 : *pszLastDoubleQuote = 0;
1246 : }
1247 :
1248 1 : osBoundary = "--";
1249 1 : osBoundary += pszBoundary;
1250 :
1251 : /* -------------------------------------------------------------------- */
1252 : /* Find the start of the first chunk. */
1253 : /* -------------------------------------------------------------------- */
1254 1 : pszNext = strstr(pBuffer,osBoundary.c_str());
1255 1 : if( pszNext == NULL )
1256 : {
1257 0 : CPLError( CE_Failure, CPLE_AppDefined, "No parts found." );
1258 0 : goto end;
1259 : }
1260 :
1261 1 : pszNext += strlen(osBoundary);
1262 2 : while( *pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0' )
1263 0 : pszNext++;
1264 1 : if( *pszNext == '\r' )
1265 1 : pszNext++;
1266 1 : if( *pszNext == '\n' )
1267 1 : pszNext++;
1268 :
1269 : /* -------------------------------------------------------------------- */
1270 : /* Loop over parts... */
1271 : /* -------------------------------------------------------------------- */
1272 129 : while( iPart < nRanges )
1273 : {
1274 : /* -------------------------------------------------------------------- */
1275 : /* Collect headers. */
1276 : /* -------------------------------------------------------------------- */
1277 128 : int bExpectedRange = FALSE;
1278 :
1279 512 : while( *pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0' )
1280 : {
1281 256 : char *pszEOL = strstr(pszNext,"\n");
1282 :
1283 256 : if( pszEOL == NULL )
1284 : {
1285 : CPLError(CE_Failure, CPLE_AppDefined,
1286 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1287 0 : goto end;
1288 : }
1289 :
1290 256 : *pszEOL = '\0';
1291 256 : int bRestoreAntislashR = FALSE;
1292 256 : if (pszEOL - pszNext > 1 && pszEOL[-1] == '\r')
1293 : {
1294 256 : bRestoreAntislashR = TRUE;
1295 256 : pszEOL[-1] = '\0';
1296 : }
1297 :
1298 256 : if (EQUALN(pszNext, "Content-Range: bytes ", strlen("Content-Range: bytes ")))
1299 : {
1300 128 : bExpectedRange = TRUE; /* FIXME */
1301 : }
1302 :
1303 256 : if (bRestoreAntislashR)
1304 256 : pszEOL[-1] = '\r';
1305 256 : *pszEOL = '\n';
1306 :
1307 256 : pszNext = pszEOL + 1;
1308 : }
1309 :
1310 128 : if (!bExpectedRange)
1311 : {
1312 : CPLError(CE_Failure, CPLE_AppDefined,
1313 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1314 0 : goto end;
1315 : }
1316 :
1317 128 : if( *pszNext == '\r' )
1318 128 : pszNext++;
1319 128 : if( *pszNext == '\n' )
1320 128 : pszNext++;
1321 :
1322 : /* -------------------------------------------------------------------- */
1323 : /* Work out the data block size. */
1324 : /* -------------------------------------------------------------------- */
1325 128 : size_t nBytesAvail = nSize - (pszNext - pBuffer);
1326 :
1327 0 : while(TRUE)
1328 : {
1329 128 : if (nBytesAvail < panSizes[iRange])
1330 : {
1331 : CPLError(CE_Failure, CPLE_AppDefined,
1332 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1333 0 : goto end;
1334 : }
1335 :
1336 128 : memcpy(ppData[iRange], pszNext, panSizes[iRange]);
1337 128 : pszNext += panSizes[iRange];
1338 128 : nBytesAvail -= panSizes[iRange];
1339 509 : if( iRange + 1 < nRanges &&
1340 381 : panOffsets[iRange] + panSizes[iRange] == panOffsets[iRange + 1] )
1341 : {
1342 0 : iRange++;
1343 : }
1344 : else
1345 : break;
1346 : }
1347 :
1348 128 : iPart ++;
1349 128 : iRange ++;
1350 :
1351 512 : while( nBytesAvail > 0
1352 : && (*pszNext != '-'
1353 : || strncmp(pszNext,osBoundary,strlen(osBoundary)) != 0) )
1354 : {
1355 256 : pszNext++;
1356 256 : nBytesAvail--;
1357 : }
1358 :
1359 128 : if( nBytesAvail == 0 )
1360 : {
1361 : CPLError(CE_Failure, CPLE_AppDefined,
1362 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1363 0 : goto end;
1364 : }
1365 :
1366 128 : pszNext += strlen(osBoundary);
1367 128 : if( strncmp(pszNext,"--",2) == 0 )
1368 : {
1369 : /* End of multipart */
1370 1 : break;
1371 : }
1372 :
1373 127 : if( *pszNext == '\r' )
1374 127 : pszNext++;
1375 127 : if( *pszNext == '\n' )
1376 127 : pszNext++;
1377 : else
1378 : {
1379 : CPLError(CE_Failure, CPLE_AppDefined,
1380 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1381 0 : goto end;
1382 : }
1383 : }
1384 :
1385 1 : if (iPart == nMergedRanges)
1386 1 : nRet = 0;
1387 : else
1388 : CPLError(CE_Failure, CPLE_AppDefined,
1389 0 : "Got only %d parts, where %d were expected", iPart, nMergedRanges);
1390 :
1391 : end:
1392 1 : CPLFree(sWriteFuncData.pBuffer);
1393 1 : CPLFree(sWriteFuncHeaderData.pBuffer);
1394 :
1395 1 : return nRet;
1396 : }
1397 :
1398 : /************************************************************************/
1399 : /* Write() */
1400 : /************************************************************************/
1401 :
1402 0 : size_t VSICurlHandle::Write( const void *pBuffer, size_t nSize, size_t nMemb )
1403 : {
1404 0 : return 0;
1405 : }
1406 :
1407 : /************************************************************************/
1408 : /* Eof() */
1409 : /************************************************************************/
1410 :
1411 :
1412 7 : int VSICurlHandle::Eof()
1413 : {
1414 7 : return bEOF;
1415 : }
1416 :
1417 : /************************************************************************/
1418 : /* Flush() */
1419 : /************************************************************************/
1420 :
1421 0 : int VSICurlHandle::Flush()
1422 : {
1423 0 : return 0;
1424 : }
1425 :
1426 : /************************************************************************/
1427 : /* Close() */
1428 : /************************************************************************/
1429 :
1430 74 : int VSICurlHandle::Close()
1431 : {
1432 74 : return 0;
1433 : }
1434 :
1435 :
1436 :
1437 :
1438 : /************************************************************************/
1439 : /* VSICurlFilesystemHandler() */
1440 : /************************************************************************/
1441 :
1442 712 : VSICurlFilesystemHandler::VSICurlFilesystemHandler()
1443 : {
1444 712 : hMutex = NULL;
1445 712 : papsRegions = NULL;
1446 712 : nRegions = 0;
1447 712 : bUseCacheDisk = CSLTestBoolean(CPLGetConfigOption("CPL_VSIL_CURL_USE_CACHE", "NO"));
1448 712 : }
1449 :
1450 : /************************************************************************/
1451 : /* ~VSICurlFilesystemHandler() */
1452 : /************************************************************************/
1453 :
1454 687 : VSICurlFilesystemHandler::~VSICurlFilesystemHandler()
1455 : {
1456 : int i;
1457 731 : for(i=0;i<nRegions;i++)
1458 : {
1459 44 : CPLFree(papsRegions[i]->pData);
1460 44 : CPLFree(papsRegions[i]);
1461 : }
1462 687 : CPLFree(papsRegions);
1463 :
1464 687 : std::map<CPLString, CachedFileProp*>::const_iterator iterCacheFileSize;
1465 :
1466 1272 : for( iterCacheFileSize = cacheFileSize.begin(); iterCacheFileSize != cacheFileSize.end(); iterCacheFileSize++ )
1467 : {
1468 585 : CPLFree(iterCacheFileSize->second);
1469 : }
1470 :
1471 687 : std::map<CPLString, CachedDirList*>::const_iterator iterCacheDirList;
1472 :
1473 705 : for( iterCacheDirList = cacheDirList.begin(); iterCacheDirList != cacheDirList.end(); iterCacheDirList++ )
1474 : {
1475 18 : CSLDestroy(iterCacheDirList->second->papszFileList);
1476 18 : CPLFree(iterCacheDirList->second);
1477 : }
1478 :
1479 687 : std::map<GIntBig, CachedConnection*>::const_iterator iterConnections;
1480 689 : for( iterConnections = mapConnections.begin(); iterConnections != mapConnections.end(); iterConnections++ )
1481 : {
1482 2 : curl_easy_cleanup(iterConnections->second->hCurlHandle);
1483 2 : delete iterConnections->second;
1484 : }
1485 :
1486 687 : if( hMutex != NULL )
1487 2 : CPLDestroyMutex( hMutex );
1488 687 : hMutex = NULL;
1489 687 : }
1490 :
1491 : /************************************************************************/
1492 : /* GetCurlHandleFor() */
1493 : /************************************************************************/
1494 :
1495 68 : CURL* VSICurlFilesystemHandler::GetCurlHandleFor(CPLString osURL)
1496 : {
1497 68 : CPLMutexHolder oHolder( &hMutex );
1498 :
1499 68 : std::map<GIntBig, CachedConnection*>::const_iterator iterConnections;
1500 :
1501 68 : iterConnections = mapConnections.find(CPLGetPID());
1502 68 : if (iterConnections == mapConnections.end())
1503 : {
1504 2 : CURL* hCurlHandle = curl_easy_init();
1505 2 : CachedConnection* psCachedConnection = new CachedConnection;
1506 2 : psCachedConnection->osURL = osURL;
1507 2 : psCachedConnection->hCurlHandle = hCurlHandle;
1508 2 : mapConnections[CPLGetPID()] = psCachedConnection;
1509 2 : return hCurlHandle;
1510 : }
1511 : else
1512 : {
1513 66 : CachedConnection* psCachedConnection = iterConnections->second;
1514 66 : if (osURL == psCachedConnection->osURL)
1515 33 : return psCachedConnection->hCurlHandle;
1516 :
1517 33 : const char* pszURL = osURL.c_str();
1518 33 : const char* pszEndOfServ = strchr(pszURL, '.');
1519 33 : if (pszEndOfServ != NULL)
1520 33 : pszEndOfServ = strchr(pszEndOfServ, '/');
1521 33 : if (pszEndOfServ == NULL)
1522 0 : pszURL = pszURL + strlen(pszURL);
1523 : int bReinitConnection = strncmp(psCachedConnection->osURL,
1524 33 : pszURL, pszEndOfServ-pszURL) != 0;
1525 :
1526 33 : if (bReinitConnection)
1527 : {
1528 7 : if (psCachedConnection->hCurlHandle)
1529 7 : curl_easy_cleanup(psCachedConnection->hCurlHandle);
1530 7 : psCachedConnection->hCurlHandle = curl_easy_init();
1531 : }
1532 33 : psCachedConnection->osURL = osURL;
1533 :
1534 33 : return psCachedConnection->hCurlHandle;
1535 0 : }
1536 : }
1537 :
1538 :
1539 : /************************************************************************/
1540 : /* GetRegionFromCacheDisk() */
1541 : /************************************************************************/
1542 :
1543 : const CachedRegion*
1544 0 : VSICurlFilesystemHandler::GetRegionFromCacheDisk(const char* pszURL,
1545 : vsi_l_offset nFileOffsetStart)
1546 : {
1547 0 : nFileOffsetStart = (nFileOffsetStart / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
1548 0 : VSILFILE* fp = VSIFOpenL(VSICurlGetCacheFileName(), "rb");
1549 0 : if (fp)
1550 : {
1551 0 : unsigned long pszURLHash = CPLHashSetHashStr(pszURL);
1552 : unsigned long pszURLHashCached;
1553 : vsi_l_offset nFileOffsetStartCached;
1554 : size_t nSizeCached;
1555 0 : while(TRUE)
1556 : {
1557 0 : if (VSIFReadL(&pszURLHashCached, 1, sizeof(unsigned long), fp) == 0)
1558 : break;
1559 0 : VSIFReadL(&nFileOffsetStartCached, 1, sizeof(vsi_l_offset), fp);
1560 0 : VSIFReadL(&nSizeCached, 1, sizeof(size_t), fp);
1561 0 : if (pszURLHash == pszURLHashCached &&
1562 : nFileOffsetStart == nFileOffsetStartCached)
1563 : {
1564 : if (ENABLE_DEBUG)
1565 0 : CPLDebug("VSICURL", "Got data at offset " CPL_FRMT_GUIB " from disk" , nFileOffsetStart);
1566 0 : if (nSizeCached)
1567 : {
1568 0 : char* pBuffer = (char*) CPLMalloc(nSizeCached);
1569 0 : VSIFReadL(pBuffer, 1, nSizeCached, fp);
1570 0 : AddRegion(pszURL, nFileOffsetStart, nSizeCached, pBuffer);
1571 0 : CPLFree(pBuffer);
1572 : }
1573 : else
1574 : {
1575 0 : AddRegion(pszURL, nFileOffsetStart, 0, NULL);
1576 : }
1577 0 : VSIFCloseL(fp);
1578 0 : return GetRegion(pszURL, nFileOffsetStart);
1579 : }
1580 : else
1581 : {
1582 0 : VSIFSeekL(fp, nSizeCached, SEEK_CUR);
1583 : }
1584 : }
1585 0 : VSIFCloseL(fp);
1586 : }
1587 0 : return NULL;
1588 : }
1589 :
1590 :
1591 : /************************************************************************/
1592 : /* AddRegionToCacheDisk() */
1593 : /************************************************************************/
1594 :
1595 0 : void VSICurlFilesystemHandler::AddRegionToCacheDisk(CachedRegion* psRegion)
1596 : {
1597 0 : VSILFILE* fp = VSIFOpenL(VSICurlGetCacheFileName(), "r+b");
1598 0 : if (fp)
1599 : {
1600 : unsigned long pszURLHashCached;
1601 : vsi_l_offset nFileOffsetStartCached;
1602 : size_t nSizeCached;
1603 0 : while(TRUE)
1604 : {
1605 0 : if (VSIFReadL(&pszURLHashCached, 1, sizeof(unsigned long), fp) == 0)
1606 : break;
1607 0 : VSIFReadL(&nFileOffsetStartCached, 1, sizeof(vsi_l_offset), fp);
1608 0 : VSIFReadL(&nSizeCached, 1, sizeof(size_t), fp);
1609 0 : if (psRegion->pszURLHash == pszURLHashCached &&
1610 : psRegion->nFileOffsetStart == nFileOffsetStartCached)
1611 : {
1612 0 : CPLAssert(psRegion->nSize == nSizeCached);
1613 0 : VSIFCloseL(fp);
1614 0 : return;
1615 : }
1616 : else
1617 : {
1618 0 : VSIFSeekL(fp, nSizeCached, SEEK_CUR);
1619 : }
1620 : }
1621 : }
1622 : else
1623 : {
1624 0 : fp = VSIFOpenL(VSICurlGetCacheFileName(), "wb");
1625 : }
1626 0 : if (fp)
1627 : {
1628 : if (ENABLE_DEBUG)
1629 0 : CPLDebug("VSICURL", "Write data at offset " CPL_FRMT_GUIB " to disk" , psRegion->nFileOffsetStart);
1630 0 : VSIFWriteL(&psRegion->pszURLHash, 1, sizeof(unsigned long), fp);
1631 0 : VSIFWriteL(&psRegion->nFileOffsetStart, 1, sizeof(vsi_l_offset), fp);
1632 0 : VSIFWriteL(&psRegion->nSize, 1, sizeof(size_t), fp);
1633 0 : if (psRegion->nSize)
1634 0 : VSIFWriteL(psRegion->pData, 1, psRegion->nSize, fp);
1635 :
1636 0 : VSIFCloseL(fp);
1637 : }
1638 0 : return;
1639 : }
1640 :
1641 :
1642 : /************************************************************************/
1643 : /* GetRegion() */
1644 : /************************************************************************/
1645 :
1646 6176 : const CachedRegion* VSICurlFilesystemHandler::GetRegion(const char* pszURL,
1647 : vsi_l_offset nFileOffsetStart)
1648 : {
1649 6176 : CPLMutexHolder oHolder( &hMutex );
1650 :
1651 6176 : unsigned long pszURLHash = CPLHashSetHashStr(pszURL);
1652 :
1653 6176 : nFileOffsetStart = (nFileOffsetStart / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
1654 : int i;
1655 7143 : for(i=0;i<nRegions;i++)
1656 : {
1657 7098 : CachedRegion* psRegion = papsRegions[i];
1658 7098 : if (psRegion->pszURLHash == pszURLHash &&
1659 : nFileOffsetStart == psRegion->nFileOffsetStart)
1660 : {
1661 6131 : memmove(papsRegions + 1, papsRegions, i * sizeof(CachedRegion*));
1662 6131 : papsRegions[0] = psRegion;
1663 6131 : return psRegion;
1664 : }
1665 : }
1666 45 : if (bUseCacheDisk)
1667 0 : return GetRegionFromCacheDisk(pszURL, nFileOffsetStart);
1668 45 : return NULL;
1669 : }
1670 :
1671 : /************************************************************************/
1672 : /* AddRegion() */
1673 : /************************************************************************/
1674 :
1675 44 : void VSICurlFilesystemHandler::AddRegion(const char* pszURL,
1676 : vsi_l_offset nFileOffsetStart,
1677 : size_t nSize,
1678 : const char *pData)
1679 : {
1680 44 : CPLMutexHolder oHolder( &hMutex );
1681 :
1682 44 : unsigned long pszURLHash = CPLHashSetHashStr(pszURL);
1683 :
1684 : CachedRegion* psRegion;
1685 44 : if (nRegions == N_MAX_REGIONS)
1686 : {
1687 0 : psRegion = papsRegions[N_MAX_REGIONS-1];
1688 0 : memmove(papsRegions + 1, papsRegions, (N_MAX_REGIONS-1) * sizeof(CachedRegion*));
1689 0 : papsRegions[0] = psRegion;
1690 0 : CPLFree(psRegion->pData);
1691 : }
1692 : else
1693 : {
1694 44 : papsRegions = (CachedRegion**) CPLRealloc(papsRegions, (nRegions + 1) * sizeof(CachedRegion*));
1695 44 : if (nRegions)
1696 42 : memmove(papsRegions + 1, papsRegions, nRegions * sizeof(CachedRegion*));
1697 44 : nRegions ++;
1698 44 : papsRegions[0] = psRegion = (CachedRegion*) CPLMalloc(sizeof(CachedRegion));
1699 : }
1700 :
1701 44 : psRegion->pszURLHash = pszURLHash;
1702 44 : psRegion->nFileOffsetStart = nFileOffsetStart;
1703 44 : psRegion->nSize = nSize;
1704 44 : psRegion->pData = (nSize) ? (char*) CPLMalloc(nSize) : NULL;
1705 44 : if (nSize)
1706 44 : memcpy(psRegion->pData, pData, nSize);
1707 :
1708 44 : if (bUseCacheDisk)
1709 0 : AddRegionToCacheDisk(psRegion);
1710 44 : }
1711 :
1712 : /************************************************************************/
1713 : /* GetCachedFileProp() */
1714 : /************************************************************************/
1715 :
1716 3074 : CachedFileProp* VSICurlFilesystemHandler::GetCachedFileProp(const char* pszURL)
1717 : {
1718 3074 : CPLMutexHolder oHolder( &hMutex );
1719 :
1720 3074 : CachedFileProp* cachedFileProp = cacheFileSize[pszURL];
1721 3074 : if (cachedFileProp == NULL)
1722 : {
1723 585 : cachedFileProp = (CachedFileProp*) CPLMalloc(sizeof(CachedFileProp));
1724 585 : cachedFileProp->eExists = EXIST_UNKNOWN;
1725 585 : cachedFileProp->bHastComputedFileSize = FALSE;
1726 585 : cachedFileProp->fileSize = 0;
1727 585 : cachedFileProp->bIsDirectory = FALSE;
1728 585 : cacheFileSize[pszURL] = cachedFileProp;
1729 : }
1730 :
1731 3074 : return cachedFileProp;
1732 : }
1733 :
1734 : /************************************************************************/
1735 : /* Open() */
1736 : /************************************************************************/
1737 :
1738 88 : VSIVirtualHandle* VSICurlFilesystemHandler::Open( const char *pszFilename,
1739 : const char *pszAccess)
1740 : {
1741 88 : if (strchr(pszAccess, 'w') != NULL ||
1742 : strchr(pszAccess, '+') != NULL)
1743 : {
1744 : CPLError(CE_Failure, CPLE_AppDefined,
1745 0 : "Only read-only mode is supported for /vsicurl");
1746 0 : return NULL;
1747 : }
1748 :
1749 : const char* pszOptionVal =
1750 88 : CPLGetConfigOption( "GDAL_DISABLE_READDIR_ON_OPEN", "NO" );
1751 : int bSkipReadDir = EQUAL(pszOptionVal, "EMPTY_DIR") ||
1752 88 : CSLTestBoolean(pszOptionVal);
1753 :
1754 88 : CPLString osFilename(pszFilename);
1755 88 : int bGotFileList = TRUE;
1756 88 : if (strchr(CPLGetFilename(osFilename), '.') != NULL &&
1757 : strncmp(CPLGetExtension(osFilename), "zip", 3) != 0 && !bSkipReadDir)
1758 : {
1759 58 : char** papszFileList = ReadDir(CPLGetDirname(osFilename), &bGotFileList);
1760 58 : int bFound = (VSICurlIsFileInList(papszFileList, CPLGetFilename(osFilename)) != -1);
1761 58 : CSLDestroy(papszFileList);
1762 58 : if (bGotFileList && !bFound)
1763 : {
1764 12 : return NULL;
1765 : }
1766 : }
1767 :
1768 76 : VSICurlHandle* poHandle = new VSICurlHandle( this, osFilename + strlen("/vsicurl/"));
1769 76 : if (!bGotFileList)
1770 : {
1771 : /* If we didn't get a filelist, check that the file really exists */
1772 17 : if (!poHandle->Exists())
1773 : {
1774 2 : delete poHandle;
1775 2 : poHandle = NULL;
1776 : }
1777 : }
1778 76 : return poHandle;
1779 : }
1780 :
1781 : /************************************************************************/
1782 : /* VSICurlParserFindEOL() */
1783 : /* */
1784 : /* Small helper function for VSICurlPaseHTMLFileList() to find */
1785 : /* the end of a line in the directory listing. Either a <br> */
1786 : /* or newline. */
1787 : /************************************************************************/
1788 :
1789 718 : static char *VSICurlParserFindEOL( char *pszData )
1790 :
1791 : {
1792 40118 : while( *pszData != '\0' && *pszData != '\n' && !EQUALN(pszData,"<br>",4) )
1793 38682 : pszData++;
1794 :
1795 718 : if( *pszData == '\0' )
1796 16 : return NULL;
1797 : else
1798 702 : return pszData;
1799 : }
1800 :
1801 :
1802 : /************************************************************************/
1803 : /* VSICurlParseHTMLDateTimeFileSize() */
1804 : /************************************************************************/
1805 :
1806 : static const char* const apszMonths[] = { "January", "February", "March",
1807 : "April", "May", "June", "July",
1808 : "August", "September", "October",
1809 : "November", "December" };
1810 :
1811 585 : static int VSICurlParseHTMLDateTimeFileSize(const char* pszStr,
1812 : struct tm& brokendowntime,
1813 : GUIntBig& nFileSize,
1814 : GIntBig& mTime)
1815 : {
1816 : int iMonth;
1817 7521 : for(iMonth=0;iMonth<12;iMonth++)
1818 : {
1819 : char szMonth[32];
1820 6946 : szMonth[0] = '-';
1821 6946 : memcpy(szMonth + 1, apszMonths[iMonth], 3);
1822 6946 : szMonth[4] = '-';
1823 6946 : szMonth[5] = '\0';
1824 6946 : const char* pszMonthFound = strstr(pszStr, szMonth);
1825 6946 : if (pszMonthFound)
1826 : {
1827 : /* Format of Apache, like in http://download.osgeo.org/gdal/data/gtiff/ */
1828 : /* "17-May-2010 12:26" */
1829 30 : if (pszMonthFound - pszStr > 2 && strlen(pszMonthFound) > 15 &&
1830 20 : pszMonthFound[-2 + 11] == ' ' && pszMonthFound[-2 + 14] == ':')
1831 : {
1832 10 : pszMonthFound -= 2;
1833 10 : int nDay = atoi(pszMonthFound);
1834 10 : int nYear = atoi(pszMonthFound + 7);
1835 10 : int nHour = atoi(pszMonthFound + 12);
1836 10 : int nMin = atoi(pszMonthFound + 15);
1837 10 : if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1838 : nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1839 : {
1840 10 : brokendowntime.tm_year = nYear - 1900;
1841 10 : brokendowntime.tm_mon = iMonth;
1842 10 : brokendowntime.tm_mday = nDay;
1843 10 : brokendowntime.tm_hour = nHour;
1844 10 : brokendowntime.tm_min = nMin;
1845 10 : mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1846 :
1847 10 : return TRUE;
1848 : }
1849 : }
1850 0 : return FALSE;
1851 : }
1852 :
1853 : /* Microsoft IIS */
1854 6936 : szMonth[0] = ' ';
1855 6936 : strcpy(szMonth + 1, apszMonths[iMonth]);
1856 6936 : strcat(szMonth, " ");
1857 6936 : pszMonthFound = strstr(pszStr, szMonth);
1858 6936 : if (pszMonthFound)
1859 : {
1860 0 : int nLenMonth = strlen(apszMonths[iMonth]);
1861 0 : if (pszMonthFound - pszStr > 2 &&
1862 0 : pszMonthFound[-1] != ',' &&
1863 0 : pszMonthFound[-2] != ' ' &&
1864 : (int)strlen(pszMonthFound-2) > 2 + 1 + nLenMonth + 1 + 4 + 1 + 5 + 1 + 4)
1865 : {
1866 : /* Format of http://ortho.linz.govt.nz/tifs/1994_95/ */
1867 : /* " Friday, 21 April 2006 12:05 p.m. 48062343 m35a_fy_94_95.tif" */
1868 0 : pszMonthFound -= 2;
1869 0 : int nDay = atoi(pszMonthFound);
1870 0 : int nCurOffset = 2 + 1 + nLenMonth + 1;
1871 0 : int nYear = atoi(pszMonthFound + nCurOffset);
1872 0 : nCurOffset += 4 + 1;
1873 0 : int nHour = atoi(pszMonthFound + nCurOffset);
1874 0 : if (nHour < 10)
1875 0 : nCurOffset += 1 + 1;
1876 : else
1877 0 : nCurOffset += 2 + 1;
1878 0 : int nMin = atoi(pszMonthFound + nCurOffset);
1879 0 : nCurOffset += 2 + 1;
1880 0 : if (strncmp(pszMonthFound + nCurOffset, "p.m.", 4) == 0)
1881 0 : nHour += 12;
1882 0 : else if (strncmp(pszMonthFound + nCurOffset, "a.m.", 4) != 0)
1883 0 : nHour = -1;
1884 0 : nCurOffset += 4;
1885 :
1886 0 : const char* pszFilesize = pszMonthFound + nCurOffset;
1887 0 : while(*pszFilesize == ' ')
1888 0 : pszFilesize ++;
1889 0 : if (*pszFilesize >= '1' && *pszFilesize <= '9')
1890 0 : nFileSize = CPLScanUIntBig(pszFilesize, strlen(pszFilesize));
1891 :
1892 0 : if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1893 : nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1894 : {
1895 0 : brokendowntime.tm_year = nYear - 1900;
1896 0 : brokendowntime.tm_mon = iMonth;
1897 0 : brokendowntime.tm_mday = nDay;
1898 0 : brokendowntime.tm_hour = nHour;
1899 0 : brokendowntime.tm_min = nMin;
1900 0 : mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1901 :
1902 0 : return TRUE;
1903 : }
1904 0 : nFileSize = 0;
1905 : }
1906 0 : else if (pszMonthFound - pszStr > 1 &&
1907 0 : pszMonthFound[-1] == ',' &&
1908 : (int)strlen(pszMonthFound) > 1 + nLenMonth + 1 + 2 + 1 + 1 + 4 + 1 + 5 + 1 + 2)
1909 : {
1910 : /* Format of http://publicfiles.dep.state.fl.us/dear/BWR_GIS/2007NWFLULC/ */
1911 : /* " Sunday, June 20, 2010 6:46 PM 233170905 NWF2007LULCForSDE.zip" */
1912 0 : pszMonthFound += 1;
1913 0 : int nCurOffset = nLenMonth + 1;
1914 0 : int nDay = atoi(pszMonthFound + nCurOffset);
1915 0 : nCurOffset += 2 + 1 + 1;
1916 0 : int nYear = atoi(pszMonthFound + nCurOffset);
1917 0 : nCurOffset += 4 + 1;
1918 0 : int nHour = atoi(pszMonthFound + nCurOffset);
1919 0 : nCurOffset += 2 + 1;
1920 0 : int nMin = atoi(pszMonthFound + nCurOffset);
1921 0 : nCurOffset += 2 + 1;
1922 0 : if (strncmp(pszMonthFound + nCurOffset, "PM", 2) == 0)
1923 0 : nHour += 12;
1924 0 : else if (strncmp(pszMonthFound + nCurOffset, "AM", 2) != 0)
1925 0 : nHour = -1;
1926 0 : nCurOffset += 2;
1927 :
1928 0 : const char* pszFilesize = pszMonthFound + nCurOffset;
1929 0 : while(*pszFilesize == ' ')
1930 0 : pszFilesize ++;
1931 0 : if (*pszFilesize >= '1' && *pszFilesize <= '9')
1932 0 : nFileSize = CPLScanUIntBig(pszFilesize, strlen(pszFilesize));
1933 :
1934 0 : if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1935 : nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1936 : {
1937 0 : brokendowntime.tm_year = nYear - 1900;
1938 0 : brokendowntime.tm_mon = iMonth;
1939 0 : brokendowntime.tm_mday = nDay;
1940 0 : brokendowntime.tm_hour = nHour;
1941 0 : brokendowntime.tm_min = nMin;
1942 0 : mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1943 :
1944 0 : return TRUE;
1945 : }
1946 0 : nFileSize = 0;
1947 : }
1948 0 : return FALSE;
1949 : }
1950 : }
1951 :
1952 575 : return FALSE;
1953 : }
1954 :
1955 : /************************************************************************/
1956 : /* ParseHTMLFileList() */
1957 : /* */
1958 : /* Parse a file list document and return all the components. */
1959 : /************************************************************************/
1960 :
1961 16 : char** VSICurlFilesystemHandler::ParseHTMLFileList(const char* pszFilename,
1962 : char* pszData,
1963 : int* pbGotFileList)
1964 : {
1965 16 : CPLStringList oFileList;
1966 16 : char* pszLine = pszData;
1967 : char* c;
1968 16 : int nCount = 0;
1969 16 : int bIsHTMLDirList = FALSE;
1970 16 : CPLString osExpectedString;
1971 16 : CPLString osExpectedString2;
1972 16 : CPLString osExpectedString3;
1973 16 : CPLString osExpectedString4;
1974 16 : CPLString osExpectedString_unescaped;
1975 :
1976 16 : *pbGotFileList = FALSE;
1977 :
1978 : const char* pszDir;
1979 16 : if (EQUALN(pszFilename, "/vsicurl/http://", strlen("/vsicurl/http://")))
1980 16 : pszDir = strchr(pszFilename + strlen("/vsicurl/http://"), '/');
1981 0 : else if (EQUALN(pszFilename, "/vsicurl/https://", strlen("/vsicurl/https://")))
1982 0 : pszDir = strchr(pszFilename + strlen("/vsicurl/https://"), '/');
1983 : else
1984 0 : pszDir = strchr(pszFilename + strlen("/vsicurl/ftp://"), '/');
1985 16 : if (pszDir == NULL)
1986 0 : pszDir = "";
1987 : /* Apache */
1988 16 : osExpectedString = "<title>Index of ";
1989 16 : osExpectedString += pszDir;
1990 16 : osExpectedString += "</title>";
1991 : /* shttpd */
1992 16 : osExpectedString2 = "<title>Index of ";
1993 16 : osExpectedString2 += pszDir;
1994 16 : osExpectedString2 += "/</title>";
1995 : /* FTP */
1996 16 : osExpectedString3 = "FTP Listing of ";
1997 16 : osExpectedString3 += pszDir;
1998 16 : osExpectedString3 += "/";
1999 : /* Apache 1.3.33 */
2000 16 : osExpectedString4 = "<TITLE>Index of ";
2001 16 : osExpectedString4 += pszDir;
2002 16 : osExpectedString4 += "</TITLE>";
2003 :
2004 : /* The listing of http://dds.cr.usgs.gov/srtm/SRTM_image_sample/picture%20examples/ */
2005 : /* has "<title>Index of /srtm/SRTM_image_sample/picture examples</title>" so we must */
2006 : /* try unescaped %20 also */
2007 : /* Similar with http://datalib.usask.ca/gis/Data/Central_America_goodbutdoweown%3f/ */
2008 16 : if (strchr(pszDir, '%'))
2009 : {
2010 0 : char* pszUnescapedDir = CPLUnescapeString(pszDir, NULL, CPLES_URL);
2011 0 : osExpectedString_unescaped = "<title>Index of ";
2012 0 : osExpectedString_unescaped += pszUnescapedDir;
2013 0 : osExpectedString_unescaped += "</title>";
2014 0 : CPLFree(pszUnescapedDir);
2015 : }
2016 :
2017 16 : int nCountTable = 0;
2018 :
2019 734 : while( (c = VSICurlParserFindEOL( pszLine )) != NULL )
2020 : {
2021 702 : *c = 0;
2022 :
2023 : /* To avoid false positive on pages such as http://www.ngs.noaa.gov/PC_PROD/USGG2009BETA */
2024 : /* This is a heuristics, but normal HTML listing of files have not more than one table */
2025 702 : if (strstr(pszLine, "<table"))
2026 : {
2027 3 : nCountTable ++;
2028 3 : if (nCountTable == 2)
2029 : {
2030 0 : *pbGotFileList = FALSE;
2031 0 : return NULL;
2032 : }
2033 : }
2034 :
2035 702 : if (!bIsHTMLDirList &&
2036 : (strstr(pszLine, osExpectedString.c_str()) ||
2037 : strstr(pszLine, osExpectedString2.c_str()) ||
2038 : strstr(pszLine, osExpectedString3.c_str()) ||
2039 : strstr(pszLine, osExpectedString4.c_str()) ||
2040 : (osExpectedString_unescaped.size() != 0 && strstr(pszLine, osExpectedString_unescaped.c_str()))))
2041 : {
2042 3 : bIsHTMLDirList = TRUE;
2043 3 : *pbGotFileList = TRUE;
2044 : }
2045 : /* Subversion HTTP listing */
2046 : /* or Microsoft-IIS/6.0 listing (e.g. http://ortho.linz.govt.nz/tifs/2005_06/) */
2047 712 : else if (!bIsHTMLDirList && strstr(pszLine, "<title>"))
2048 : {
2049 : /* Detect something like : <html><head><title>gdal - Revision 20739: /trunk/autotest/gcore/data</title></head> */
2050 : /* The annoying thing is that what is after ': ' is a subpart of what is after http://server/ */
2051 13 : char* pszSubDir = strstr(pszLine, ": ");
2052 13 : if (pszSubDir == NULL)
2053 : /* or <title>ortho.linz.govt.nz - /tifs/2005_06/</title> */
2054 0 : pszSubDir = strstr(pszLine, "- ");
2055 13 : if (pszSubDir)
2056 : {
2057 13 : pszSubDir += 2;
2058 13 : char* pszTmp = strstr(pszSubDir, "</title>");
2059 13 : if (pszTmp)
2060 : {
2061 13 : if (pszTmp[-1] == '/')
2062 0 : pszTmp[-1] = 0;
2063 : else
2064 13 : *pszTmp = 0;
2065 13 : if (strstr(pszDir, pszSubDir))
2066 : {
2067 13 : bIsHTMLDirList = TRUE;
2068 13 : *pbGotFileList = TRUE;
2069 : }
2070 : }
2071 : }
2072 : }
2073 686 : else if (bIsHTMLDirList &&
2074 : (strstr(pszLine, "<a href=\"") != NULL || strstr(pszLine, "<A HREF=\"") != NULL) &&
2075 : strstr(pszLine, "<a href=\"http://") == NULL && /* exclude absolute links, like to subversion home */
2076 : strstr(pszLine, "Parent Directory") == NULL /* exclude parent directory */)
2077 : {
2078 588 : char *beginFilename = strstr(pszLine, "<a href=\"");
2079 588 : if (beginFilename == NULL)
2080 0 : beginFilename = strstr(pszLine, "<A HREF=\"");
2081 588 : beginFilename += strlen("<a href=\"");
2082 588 : char *endQuote = strchr(beginFilename, '"');
2083 588 : if (endQuote && strncmp(beginFilename, "?C=", 3) != 0 && strncmp(beginFilename, "?N=", 3) != 0)
2084 : {
2085 : struct tm brokendowntime;
2086 585 : memset(&brokendowntime, 0, sizeof(brokendowntime));
2087 585 : GUIntBig nFileSize = 0;
2088 585 : GIntBig mTime = 0;
2089 :
2090 : VSICurlParseHTMLDateTimeFileSize(pszLine,
2091 : brokendowntime,
2092 : nFileSize,
2093 585 : mTime);
2094 :
2095 585 : *endQuote = '\0';
2096 :
2097 : /* Remove trailing slash, that are returned for directories by */
2098 : /* Apache */
2099 585 : int bIsDirectory = FALSE;
2100 585 : if (endQuote[-1] == '/')
2101 : {
2102 24 : bIsDirectory = TRUE;
2103 24 : endQuote[-1] = 0;
2104 : }
2105 :
2106 : /* shttpd links include slashes from the root directory. Skip them */
2107 1170 : while(strchr(beginFilename, '/'))
2108 0 : beginFilename = strchr(beginFilename, '/') + 1;
2109 :
2110 585 : if (strcmp(beginFilename, ".") != 0 &&
2111 : strcmp(beginFilename, "..") != 0)
2112 : {
2113 : CPLString osCachedFilename =
2114 572 : CPLSPrintf("%s/%s", pszFilename + strlen("/vsicurl/"), beginFilename);
2115 572 : CachedFileProp* cachedFileProp = GetCachedFileProp(osCachedFilename);
2116 572 : cachedFileProp->eExists = EXIST_YES;
2117 572 : cachedFileProp->bIsDirectory = bIsDirectory;
2118 572 : cachedFileProp->mTime = mTime;
2119 572 : cachedFileProp->bHastComputedFileSize = nFileSize > 0;
2120 572 : cachedFileProp->fileSize = nFileSize;
2121 :
2122 572 : oFileList.AddString( beginFilename );
2123 : if (ENABLE_DEBUG)
2124 : CPLDebug("VSICURL", "File[%d] = %s, is_dir = %d, size = " CPL_FRMT_GUIB ", time = %04d/%02d/%02d %02d:%02d:%02d",
2125 : nCount, beginFilename, bIsDirectory, nFileSize,
2126 : brokendowntime.tm_year + 1900, brokendowntime.tm_mon + 1, brokendowntime.tm_mday,
2127 572 : brokendowntime.tm_hour, brokendowntime.tm_min, brokendowntime.tm_sec);
2128 572 : nCount ++;
2129 : }
2130 : }
2131 : }
2132 702 : pszLine = c + 1;
2133 : }
2134 :
2135 16 : return oFileList.StealList();
2136 : }
2137 :
2138 :
2139 : /************************************************************************/
2140 : /* VSICurlGetToken() */
2141 : /************************************************************************/
2142 :
2143 16 : static char* VSICurlGetToken(char* pszCurPtr, char** ppszNextToken)
2144 : {
2145 16 : if (pszCurPtr == NULL)
2146 0 : return NULL;
2147 :
2148 32 : while((*pszCurPtr) == ' ')
2149 0 : pszCurPtr ++;
2150 16 : if (*pszCurPtr == '\0')
2151 0 : return NULL;
2152 :
2153 16 : char* pszToken = pszCurPtr;
2154 104 : while((*pszCurPtr) != ' ' && (*pszCurPtr) != '\0')
2155 72 : pszCurPtr ++;
2156 16 : if (*pszCurPtr == '\0')
2157 0 : *ppszNextToken = NULL;
2158 : else
2159 : {
2160 16 : *pszCurPtr = '\0';
2161 16 : pszCurPtr ++;
2162 56 : while((*pszCurPtr) == ' ')
2163 24 : pszCurPtr ++;
2164 16 : *ppszNextToken = pszCurPtr;
2165 : }
2166 :
2167 16 : return pszToken;
2168 : }
2169 :
2170 : /************************************************************************/
2171 : /* VSICurlParseFullFTPLine() */
2172 : /************************************************************************/
2173 :
2174 : /* Parse lines like the following ones :
2175 : -rw-r--r-- 1 10003 100 430 Jul 04 2008 COPYING
2176 : lrwxrwxrwx 1 ftp ftp 28 Jun 14 14:13 MPlayer -> mirrors/mplayerhq.hu/MPlayer
2177 : -rw-r--r-- 1 ftp ftp 725614592 May 13 20:13 Fedora-15-x86_64-Live-KDE.iso
2178 : drwxr-xr-x 280 1003 1003 6656 Aug 26 04:17 gnu
2179 : */
2180 :
2181 2 : static int VSICurlParseFullFTPLine(char* pszLine,
2182 : char*& pszFilename,
2183 : int& bSizeValid,
2184 : GUIntBig& nSize,
2185 : int& bIsDirectory,
2186 : GIntBig& nUnixTime)
2187 : {
2188 2 : char* pszNextToken = pszLine;
2189 2 : char* pszPermissions = VSICurlGetToken(pszNextToken, &pszNextToken);
2190 2 : if (pszPermissions == NULL || strlen(pszPermissions) != 10)
2191 0 : return FALSE;
2192 2 : bIsDirectory = (pszPermissions[0] == 'd');
2193 :
2194 : int i;
2195 8 : for(i = 0; i < 3; i++)
2196 : {
2197 6 : if (VSICurlGetToken(pszNextToken, &pszNextToken) == NULL)
2198 0 : return FALSE;
2199 : }
2200 :
2201 2 : char* pszSize = VSICurlGetToken(pszNextToken, &pszNextToken);
2202 2 : if (pszSize == NULL)
2203 0 : return FALSE;
2204 :
2205 2 : if (pszPermissions[0] == '-')
2206 : {
2207 : /* Regular file */
2208 2 : bSizeValid = TRUE;
2209 2 : nSize = CPLScanUIntBig(pszSize, strlen(pszSize));
2210 : }
2211 :
2212 : struct tm brokendowntime;
2213 2 : memset(&brokendowntime, 0, sizeof(brokendowntime));
2214 2 : int bBrokenDownTimeValid = TRUE;
2215 :
2216 2 : char* pszMonth = VSICurlGetToken(pszNextToken, &pszNextToken);
2217 2 : if (pszMonth == NULL || strlen(pszMonth) != 3)
2218 0 : return FALSE;
2219 :
2220 20 : for(i = 0; i < 12; i++)
2221 : {
2222 20 : if (EQUALN(pszMonth, apszMonths[i], 3))
2223 2 : break;
2224 : }
2225 2 : if (i < 12)
2226 2 : brokendowntime.tm_mon = i;
2227 : else
2228 0 : bBrokenDownTimeValid = FALSE;
2229 :
2230 2 : char* pszDay = VSICurlGetToken(pszNextToken, &pszNextToken);
2231 2 : if (pszDay == NULL || (strlen(pszDay) != 1 && strlen(pszDay) != 2))
2232 0 : return FALSE;
2233 2 : int nDay = atoi(pszDay);
2234 4 : if (nDay >= 1 && nDay <= 31)
2235 2 : brokendowntime.tm_mday = nDay;
2236 : else
2237 0 : bBrokenDownTimeValid = FALSE;
2238 :
2239 2 : char* pszHourOrYear = VSICurlGetToken(pszNextToken, &pszNextToken);
2240 2 : if (pszHourOrYear == NULL || (strlen(pszHourOrYear) != 4 && strlen(pszHourOrYear) != 5))
2241 0 : return FALSE;
2242 2 : if (strlen(pszHourOrYear) == 4)
2243 : {
2244 2 : brokendowntime.tm_year = atoi(pszHourOrYear) - 1900;
2245 : }
2246 : else
2247 : {
2248 : time_t sTime;
2249 0 : time(&sTime);
2250 : struct tm currentBrokendowntime;
2251 0 : CPLUnixTimeToYMDHMS((GIntBig)sTime, ¤tBrokendowntime);
2252 0 : brokendowntime.tm_year = currentBrokendowntime.tm_year;
2253 0 : brokendowntime.tm_hour = atoi(pszHourOrYear);
2254 0 : brokendowntime.tm_min = atoi(pszHourOrYear + 3);
2255 : }
2256 :
2257 2 : if (bBrokenDownTimeValid)
2258 2 : nUnixTime = CPLYMDHMSToUnixTime(&brokendowntime);
2259 : else
2260 0 : nUnixTime = 0;
2261 :
2262 2 : if (pszNextToken == NULL)
2263 0 : return FALSE;
2264 :
2265 2 : pszFilename = pszNextToken;
2266 :
2267 2 : char* pszCurPtr = pszFilename;
2268 33 : while( *pszCurPtr != '\0')
2269 : {
2270 : /* In case of a link, stop before the pointed part of the link */
2271 29 : if (pszPermissions[0] == 'l' && strncmp(pszCurPtr, " -> ", 4) == 0)
2272 : {
2273 0 : break;
2274 : }
2275 29 : pszCurPtr ++;
2276 : }
2277 2 : *pszCurPtr = '\0';
2278 :
2279 2 : return TRUE;
2280 : }
2281 :
2282 : /************************************************************************/
2283 : /* GetFileList() */
2284 : /************************************************************************/
2285 :
2286 18 : char** VSICurlFilesystemHandler::GetFileList(const char *pszDirname, int* pbGotFileList)
2287 : {
2288 : if (ENABLE_DEBUG)
2289 18 : CPLDebug("VSICURL", "GetFileList(%s)" , pszDirname);
2290 :
2291 18 : *pbGotFileList = FALSE;
2292 :
2293 : /* HACK (optimization in fact) for MBTiles driver */
2294 18 : if (strstr(pszDirname, ".tiles.mapbox.com") != NULL)
2295 1 : return NULL;
2296 :
2297 17 : if (strncmp(pszDirname, "/vsicurl/ftp", strlen("/vsicurl/ftp")) == 0)
2298 : {
2299 : WriteFuncStruct sWriteFuncData;
2300 1 : sWriteFuncData.pBuffer = NULL;
2301 :
2302 1 : CPLString osDirname(pszDirname + strlen("/vsicurl/"));
2303 1 : osDirname += '/';
2304 :
2305 1 : char** papszFileList = NULL;
2306 :
2307 1 : for(int iTry=0;iTry<2;iTry++)
2308 : {
2309 1 : CURL* hCurlHandle = GetCurlHandleFor(osDirname);
2310 1 : VSICurlSetOptions(hCurlHandle, osDirname.c_str());
2311 :
2312 : /* On the first pass, we want to try fetching all the possible */
2313 : /* informations (filename, file/directory, size). If that */
2314 : /* does not work, then try again with CURLOPT_DIRLISTONLY set */
2315 1 : if (iTry == 1)
2316 : {
2317 : /* 7.16.4 */
2318 : #if LIBCURL_VERSION_NUM <= 0x071004
2319 : curl_easy_setopt(hCurlHandle, CURLOPT_FTPLISTONLY, 1);
2320 : #elif LIBCURL_VERSION_NUM > 0x071004
2321 0 : curl_easy_setopt(hCurlHandle, CURLOPT_DIRLISTONLY, 1);
2322 : #endif
2323 : }
2324 :
2325 1 : VSICURLInitWriteFuncStruct(&sWriteFuncData, NULL, NULL, NULL);
2326 1 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
2327 1 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
2328 :
2329 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
2330 1 : szCurlErrBuf[0] = '\0';
2331 1 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
2332 :
2333 1 : curl_easy_perform(hCurlHandle);
2334 :
2335 1 : if (sWriteFuncData.pBuffer == NULL)
2336 0 : return NULL;
2337 :
2338 1 : char* pszLine = sWriteFuncData.pBuffer;
2339 : char* c;
2340 1 : int nCount = 0;
2341 :
2342 1 : if (EQUALN(pszLine, "<!DOCTYPE HTML", strlen("<!DOCTYPE HTML")) ||
2343 : EQUALN(pszLine, "<HTML>", 6))
2344 : {
2345 : papszFileList = ParseHTMLFileList(pszDirname,
2346 : sWriteFuncData.pBuffer,
2347 0 : pbGotFileList);
2348 0 : break;
2349 : }
2350 1 : else if (iTry == 0)
2351 : {
2352 1 : CPLStringList oFileList;
2353 1 : *pbGotFileList = TRUE;
2354 :
2355 4 : while( (c = strchr(pszLine, '\n')) != NULL)
2356 : {
2357 2 : *c = 0;
2358 2 : if (c - pszLine > 0 && c[-1] == '\r')
2359 0 : c[-1] = 0;
2360 :
2361 2 : char* pszFilename = NULL;
2362 2 : int bSizeValid = FALSE;
2363 2 : GUIntBig nFileSize = 0;
2364 2 : int bIsDirectory = FALSE;
2365 2 : GIntBig mUnixTime = 0;
2366 2 : if (!VSICurlParseFullFTPLine(pszLine, pszFilename,
2367 : bSizeValid, nFileSize,
2368 : bIsDirectory, mUnixTime))
2369 0 : break;
2370 :
2371 2 : if (strcmp(pszFilename, ".") != 0 &&
2372 : strcmp(pszFilename, "..") != 0)
2373 : {
2374 : CPLString osCachedFilename =
2375 2 : CPLSPrintf("%s/%s", pszDirname + strlen("/vsicurl/"), pszFilename);
2376 2 : CachedFileProp* cachedFileProp = GetCachedFileProp(osCachedFilename);
2377 2 : cachedFileProp->eExists = EXIST_YES;
2378 2 : cachedFileProp->bHastComputedFileSize = bSizeValid;
2379 2 : cachedFileProp->fileSize = nFileSize;
2380 2 : cachedFileProp->bIsDirectory = bIsDirectory;
2381 2 : cachedFileProp->mTime = mUnixTime;
2382 :
2383 2 : oFileList.AddString(pszFilename);
2384 : if (ENABLE_DEBUG)
2385 : {
2386 : struct tm brokendowntime;
2387 2 : CPLUnixTimeToYMDHMS(mUnixTime, &brokendowntime);
2388 : CPLDebug("VSICURL", "File[%d] = %s, is_dir = %d, size = " CPL_FRMT_GUIB ", time = %04d/%02d/%02d %02d:%02d:%02d",
2389 : nCount, pszFilename, bIsDirectory, nFileSize,
2390 : brokendowntime.tm_year + 1900, brokendowntime.tm_mon + 1, brokendowntime.tm_mday,
2391 2 : brokendowntime.tm_hour, brokendowntime.tm_min, brokendowntime.tm_sec);
2392 : }
2393 :
2394 2 : nCount ++;
2395 : }
2396 :
2397 2 : pszLine = c + 1;
2398 : }
2399 :
2400 1 : if (c == NULL)
2401 : {
2402 1 : papszFileList = oFileList.StealList();
2403 : break;
2404 0 : }
2405 : }
2406 : else
2407 : {
2408 0 : CPLStringList oFileList;
2409 0 : *pbGotFileList = TRUE;
2410 :
2411 0 : while( (c = strchr(pszLine, '\n')) != NULL)
2412 : {
2413 0 : *c = 0;
2414 0 : if (c - pszLine > 0 && c[-1] == '\r')
2415 0 : c[-1] = 0;
2416 :
2417 0 : if (strcmp(pszLine, ".") != 0 &&
2418 : strcmp(pszLine, "..") != 0)
2419 : {
2420 0 : oFileList.AddString(pszLine);
2421 : if (ENABLE_DEBUG)
2422 0 : CPLDebug("VSICURL", "File[%d] = %s", nCount, pszLine);
2423 0 : nCount ++;
2424 : }
2425 :
2426 0 : pszLine = c + 1;
2427 : }
2428 :
2429 0 : papszFileList = oFileList.StealList();
2430 : }
2431 :
2432 0 : CPLFree(sWriteFuncData.pBuffer);
2433 0 : sWriteFuncData.pBuffer = NULL;
2434 : }
2435 :
2436 1 : CPLFree(sWriteFuncData.pBuffer);
2437 :
2438 1 : return papszFileList;
2439 : }
2440 :
2441 : /* Try to recognize HTML pages that list the content of a directory */
2442 : /* Currently this supports what Apache and shttpd can return */
2443 16 : else if (strncmp(pszDirname, "/vsicurl/http://", strlen("/vsicurl/http://")) == 0 ||
2444 : strncmp(pszDirname, "/vsicurl/https://", strlen("/vsicurl/https://")) == 0)
2445 : {
2446 : WriteFuncStruct sWriteFuncData;
2447 :
2448 16 : CPLString osDirname(pszDirname + strlen("/vsicurl/"));
2449 16 : osDirname += '/';
2450 :
2451 : #if LIBCURL_VERSION_NUM < 0x070B00
2452 : /* Curl 7.10.X doesn't manage to unset the CURLOPT_RANGE that would have been */
2453 : /* previously set, so we have to reinit the connection handle */
2454 : GetCurlHandleFor("");
2455 : #endif
2456 :
2457 16 : CURL* hCurlHandle = GetCurlHandleFor(osDirname);
2458 16 : VSICurlSetOptions(hCurlHandle, osDirname.c_str());
2459 :
2460 16 : curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, NULL);
2461 :
2462 16 : VSICURLInitWriteFuncStruct(&sWriteFuncData, NULL, NULL, NULL);
2463 16 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
2464 16 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
2465 :
2466 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
2467 16 : szCurlErrBuf[0] = '\0';
2468 16 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
2469 :
2470 16 : curl_easy_perform(hCurlHandle);
2471 :
2472 16 : if (sWriteFuncData.pBuffer == NULL)
2473 0 : return NULL;
2474 :
2475 : char** papszFileList = ParseHTMLFileList(pszDirname,
2476 : sWriteFuncData.pBuffer,
2477 16 : pbGotFileList);
2478 :
2479 16 : CPLFree(sWriteFuncData.pBuffer);
2480 16 : return papszFileList;
2481 : }
2482 :
2483 0 : return NULL;
2484 : }
2485 :
2486 : /************************************************************************/
2487 : /* Stat() */
2488 : /************************************************************************/
2489 :
2490 812 : int VSICurlFilesystemHandler::Stat( const char *pszFilename, VSIStatBufL *pStatBuf,
2491 : int nFlags )
2492 : {
2493 812 : CPLString osFilename(pszFilename);
2494 :
2495 812 : memset(pStatBuf, 0, sizeof(VSIStatBufL));
2496 :
2497 : const char* pszOptionVal =
2498 812 : CPLGetConfigOption( "GDAL_DISABLE_READDIR_ON_OPEN", "NO" );
2499 : int bSkipReadDir = EQUAL(pszOptionVal, "EMPTY_DIR") ||
2500 812 : CSLTestBoolean(pszOptionVal);
2501 :
2502 : /* Does it look like a FTP directory ? */
2503 814 : if (strncmp(osFilename, "/vsicurl/ftp", strlen("/vsicurl/ftp")) == 0 &&
2504 2 : pszFilename[strlen(osFilename) - 1] == '/' && !bSkipReadDir)
2505 : {
2506 0 : char** papszFileList = ReadDir(osFilename);
2507 0 : if (papszFileList)
2508 : {
2509 0 : pStatBuf->st_mode = S_IFDIR;
2510 0 : pStatBuf->st_size = 0;
2511 :
2512 0 : CSLDestroy(papszFileList);
2513 :
2514 0 : return 0;
2515 : }
2516 0 : return -1;
2517 : }
2518 812 : else if (strchr(CPLGetFilename(osFilename), '.') != NULL &&
2519 : strncmp(CPLGetExtension(osFilename), "zip", 3) != 0 &&
2520 : !bSkipReadDir)
2521 : {
2522 : int bGotFileList;
2523 756 : char** papszFileList = ReadDir(CPLGetDirname(osFilename), &bGotFileList);
2524 756 : int bFound = (VSICurlIsFileInList(papszFileList, CPLGetFilename(osFilename)) != -1);
2525 756 : CSLDestroy(papszFileList);
2526 756 : if (bGotFileList && !bFound)
2527 : {
2528 14 : return -1;
2529 : }
2530 : }
2531 :
2532 798 : VSICurlHandle oHandle( this, osFilename + strlen("/vsicurl/"));
2533 :
2534 798 : if ( oHandle.IsKnownFileSize() ||
2535 : ((nFlags & VSI_STAT_SIZE_FLAG) && !oHandle.IsDirectory() &&
2536 : CSLTestBoolean(CPLGetConfigOption("CPL_VSIL_CURL_SLOW_GET_SIZE", "YES"))) )
2537 27 : pStatBuf->st_size = oHandle.GetFileSize();
2538 :
2539 798 : int nRet = (oHandle.Exists()) ? 0 : -1;
2540 798 : pStatBuf->st_mtime = oHandle.GetMTime();
2541 798 : pStatBuf->st_mode = oHandle.IsDirectory() ? S_IFDIR : S_IFREG;
2542 798 : return nRet;
2543 : }
2544 :
2545 : /************************************************************************/
2546 : /* Unlink() */
2547 : /************************************************************************/
2548 :
2549 0 : int VSICurlFilesystemHandler::Unlink( const char *pszFilename )
2550 : {
2551 0 : return -1;
2552 : }
2553 :
2554 : /************************************************************************/
2555 : /* Rename() */
2556 : /************************************************************************/
2557 :
2558 0 : int VSICurlFilesystemHandler::Rename( const char *oldpath, const char *newpath )
2559 : {
2560 0 : return -1;
2561 : }
2562 :
2563 : /************************************************************************/
2564 : /* Mkdir() */
2565 : /************************************************************************/
2566 :
2567 0 : int VSICurlFilesystemHandler::Mkdir( const char *pszDirname, long nMode )
2568 : {
2569 0 : return -1;
2570 : }
2571 : /************************************************************************/
2572 : /* Rmdir() */
2573 : /************************************************************************/
2574 :
2575 0 : int VSICurlFilesystemHandler::Rmdir( const char *pszDirname )
2576 : {
2577 0 : return -1;
2578 : }
2579 :
2580 : /************************************************************************/
2581 : /* ReadDir() */
2582 : /************************************************************************/
2583 :
2584 1575 : char** VSICurlFilesystemHandler::ReadDir( const char *pszDirname, int* pbGotFileList )
2585 : {
2586 1575 : CPLString osDirname(pszDirname);
2587 3150 : while (osDirname[strlen(osDirname) - 1] == '/')
2588 0 : osDirname.erase(strlen(osDirname) - 1);
2589 :
2590 1575 : const char* pszUpDir = strstr(osDirname, "/..");
2591 1575 : if (pszUpDir != NULL)
2592 : {
2593 0 : int pos = pszUpDir - osDirname.c_str() - 1;
2594 0 : while(pos >= 0 && osDirname[pos] != '/')
2595 0 : pos --;
2596 0 : if (pos >= 1)
2597 : {
2598 0 : osDirname = osDirname.substr(0, pos) + CPLString(pszUpDir + 3);
2599 : }
2600 : }
2601 :
2602 1575 : CPLMutexHolder oHolder( &hMutex );
2603 :
2604 : /* If we know the file exists and is not a directory, then don't try to list its content */
2605 1575 : CachedFileProp* cachedFileProp = GetCachedFileProp(osDirname.c_str() + strlen("/vsicurl/"));
2606 1575 : if (cachedFileProp->eExists == EXIST_YES && !cachedFileProp->bIsDirectory)
2607 : {
2608 732 : if (pbGotFileList)
2609 0 : *pbGotFileList = TRUE;
2610 732 : return NULL;
2611 : }
2612 :
2613 843 : CachedDirList* psCachedDirList = cacheDirList[osDirname];
2614 843 : if (psCachedDirList == NULL)
2615 : {
2616 18 : psCachedDirList = (CachedDirList*) CPLMalloc(sizeof(CachedDirList));
2617 18 : psCachedDirList->papszFileList = GetFileList(osDirname, &psCachedDirList->bGotFileList);
2618 18 : cacheDirList[osDirname] = psCachedDirList;
2619 : }
2620 :
2621 843 : if (pbGotFileList)
2622 814 : *pbGotFileList = psCachedDirList->bGotFileList;
2623 :
2624 843 : return CSLDuplicate(psCachedDirList->papszFileList);
2625 : }
2626 :
2627 : /************************************************************************/
2628 : /* ReadDir() */
2629 : /************************************************************************/
2630 :
2631 761 : char** VSICurlFilesystemHandler::ReadDir( const char *pszDirname )
2632 : {
2633 761 : return ReadDir(pszDirname, NULL);
2634 : }
2635 :
2636 : /************************************************************************/
2637 : /* VSIInstallCurlFileHandler() */
2638 : /************************************************************************/
2639 :
2640 : /**
2641 : * \brief Install /vsicurl/ HTTP/FTP file system handler (requires libcurl)
2642 : *
2643 : * A special file handler is installed that allows reading on-the-fly of files
2644 : * available through HTTP/FTP web protocols, without downloading the entire file.
2645 : *
2646 : * Recognized filenames are of the form /vsicurl/http://path/to/remote/ressource or
2647 : * /vsicurl/ftp://path/to/remote/ressource where path/to/remote/ressource is the
2648 : * URL of a remote ressource.
2649 : *
2650 : * Partial downloads (requires the HTTP server to support random reading) are done
2651 : * with a 16 KB granularity by default. If the driver detects sequential reading
2652 : * it will progressively increase the chunk size up to 2 MB to improve download
2653 : * performance.
2654 : *
2655 : * The GDAL_HTTP_PROXY and GDAL_HTTP_PROXYUSERPWD configuration options can be
2656 : * used to define a proxy server. The syntax to use is the one of Curl CURLOPT_PROXY
2657 : * and CURLOPT_PROXYUSERPWD options.
2658 : *
2659 : * VSIStatL() will return the size in st_size member and file
2660 : * nature- file or directory - in st_mode member (the later only reliable with FTP
2661 : * resources for now).
2662 : *
2663 : * VSIReadDir() should be able to parse the HTML directory listing returned by the
2664 : * most popular web servers, such as Apache or Microsoft IIS.
2665 : *
2666 : * This special file handler can be combined with other virtual filesystems handlers,
2667 : * such as /vsizip. For example, /vsizip//vsicurl/path/to/remote/file.zip/path/inside/zip
2668 : *
2669 : * @since GDAL 1.8.0
2670 : */
2671 712 : void VSIInstallCurlFileHandler(void)
2672 : {
2673 712 : VSIFileManager::InstallHandler( "/vsicurl/", new VSICurlFilesystemHandler );
2674 712 : }
2675 :
2676 : /************************************************************************/
2677 : /* VSICurlInstallReadCbk() */
2678 : /************************************************************************/
2679 :
2680 1 : int VSICurlInstallReadCbk (VSILFILE* fp,
2681 : VSICurlReadCbkFunc pfnReadCbk,
2682 : void* pfnUserData,
2683 : int bStopOnInterrruptUntilUninstall)
2684 : {
2685 : return ((VSICurlHandle*)fp)->InstallReadCbk(pfnReadCbk, pfnUserData,
2686 1 : bStopOnInterrruptUntilUninstall);
2687 : }
2688 :
2689 :
2690 : /************************************************************************/
2691 : /* VSICurlUninstallReadCbk() */
2692 : /************************************************************************/
2693 :
2694 1 : int VSICurlUninstallReadCbk(VSILFILE* fp)
2695 : {
2696 1 : return ((VSICurlHandle*)fp)->UninstallReadCbk();
2697 : }
2698 :
2699 : #endif /* HAVE_CURL */
|