1 : /******************************************************************************
2 : * $Id: cpl_vsil_curl.cpp 23786 2012-01-22 13:35:42Z rouault $
3 : *
4 : * Project: CPL - Common Portability Library
5 : * Purpose: Implement VSI large file api for HTTP/FTP files
6 : * Author: Even Rouault, even.rouault at mines-paris.org
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 2008, Even Rouault
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "cpl_vsi_virtual.h"
31 : #include "cpl_string.h"
32 : #include "cpl_multiproc.h"
33 : #include "cpl_hash_set.h"
34 : #include "cpl_time.h"
35 : #include "cpl_vsil_curl_priv.h"
36 :
37 : CPL_CVSID("$Id: cpl_vsil_curl.cpp 23786 2012-01-22 13:35:42Z rouault $");
38 :
39 : #ifndef HAVE_CURL
40 :
41 : void VSIInstallCurlFileHandler(void)
42 : {
43 : /* not supported */
44 : }
45 :
46 : /************************************************************************/
47 : /* VSICurlInstallReadCbk() */
48 : /************************************************************************/
49 :
50 : int VSICurlInstallReadCbk (VSILFILE* fp,
51 : VSICurlReadCbkFunc pfnReadCbk,
52 : void* pfnUserData,
53 : int bStopOnInterrruptUntilUninstall)
54 : {
55 : return FALSE;
56 : }
57 :
58 :
59 : /************************************************************************/
60 : /* VSICurlUninstallReadCbk() */
61 : /************************************************************************/
62 :
63 : int VSICurlUninstallReadCbk(VSILFILE* fp)
64 : {
65 : return FALSE;
66 : }
67 :
68 : #else
69 :
70 : #include <curl/curl.h>
71 :
72 : #include <map>
73 :
74 : #define ENABLE_DEBUG 1
75 :
76 : #define N_MAX_REGIONS 1000
77 :
78 : #define DOWNLOAD_CHUNCK_SIZE 16384
79 :
80 : typedef enum
81 : {
82 : EXIST_UNKNOWN = -1,
83 : EXIST_NO,
84 : EXIST_YES,
85 : } ExistStatus;
86 :
87 : typedef struct
88 : {
89 : ExistStatus eExists;
90 : int bHastComputedFileSize;
91 : vsi_l_offset fileSize;
92 : int bIsDirectory;
93 : time_t mTime;
94 : } CachedFileProp;
95 :
96 : typedef struct
97 : {
98 : int bGotFileList;
99 : char** papszFileList; /* only file name without path */
100 : } CachedDirList;
101 :
102 : typedef struct
103 : {
104 : unsigned long pszURLHash;
105 : vsi_l_offset nFileOffsetStart;
106 : size_t nSize;
107 : char *pData;
108 : } CachedRegion;
109 :
110 :
111 0 : static const char* VSICurlGetCacheFileName()
112 : {
113 0 : return "gdal_vsicurl_cache.bin";
114 : }
115 :
116 : /************************************************************************/
117 : /* VSICurlFindStringSensitiveExceptEscapeSequences() */
118 : /************************************************************************/
119 :
120 1522 : static int VSICurlFindStringSensitiveExceptEscapeSequences( char ** papszList,
121 : const char * pszTarget )
122 :
123 : {
124 : int i;
125 :
126 1522 : if( papszList == NULL )
127 112 : return -1;
128 :
129 121420 : for( i = 0; papszList[i] != NULL; i++ )
130 : {
131 121370 : const char* pszIter1 = papszList[i];
132 121370 : const char* pszIter2 = pszTarget;
133 : char ch1, ch2;
134 : /* The comparison is case-sensitive, escape for escaped */
135 : /* sequences where letters of the hexadecimal sequence */
136 : /* can be uppercase or lowercase depending on the quoting algorithm */
137 70942 : while(TRUE)
138 : {
139 192312 : ch1 = *pszIter1;
140 192312 : ch2 = *pszIter2;
141 192312 : if (ch1 == '\0' || ch2 == '\0')
142 1378 : break;
143 190934 : if (ch1 == '%' && ch2 == '%' &&
144 0 : pszIter1[1] != '\0' && pszIter1[2] != '\0' &&
145 0 : pszIter2[1] != '\0' && pszIter2[2] != '\0')
146 : {
147 0 : if (!EQUALN(pszIter1+1, pszIter2+1, 2))
148 0 : break;
149 0 : pszIter1 += 2;
150 0 : pszIter2 += 2;
151 : }
152 190934 : if (ch1 != ch2)
153 119992 : break;
154 70942 : pszIter1 ++;
155 70942 : pszIter2 ++;
156 : }
157 121370 : if (ch1 == ch2 && ch1 == '\0')
158 1360 : return i;
159 : }
160 :
161 50 : return -1;
162 : }
163 :
164 : /************************************************************************/
165 : /* VSICurlIsFileInList() */
166 : /************************************************************************/
167 :
168 1466 : static int VSICurlIsFileInList( char ** papszList, const char * pszTarget )
169 : {
170 1466 : int nRet = VSICurlFindStringSensitiveExceptEscapeSequences(papszList, pszTarget);
171 1466 : if (nRet >= 0)
172 1360 : return nRet;
173 :
174 : /* If we didn't find anything, try to URL-escape the target filename */
175 106 : char* pszEscaped = CPLEscapeString(pszTarget, -1, CPLES_URL);
176 106 : if (strcmp(pszTarget, pszEscaped) != 0)
177 : {
178 56 : nRet = VSICurlFindStringSensitiveExceptEscapeSequences(papszList, pszEscaped);
179 : }
180 106 : CPLFree(pszEscaped);
181 106 : return nRet;
182 : }
183 :
184 : /************************************************************************/
185 : /* VSICurlFilesystemHandler */
186 : /************************************************************************/
187 :
188 : typedef struct
189 : {
190 : CPLString osURL;
191 : CURL *hCurlHandle;
192 4 : } CachedConnection;
193 :
194 :
195 : class VSICurlFilesystemHandler : public VSIFilesystemHandler
196 : {
197 : void *hMutex;
198 :
199 : CachedRegion **papsRegions;
200 : int nRegions;
201 :
202 : std::map<CPLString, CachedFileProp*> cacheFileSize;
203 : std::map<CPLString, CachedDirList*> cacheDirList;
204 :
205 : int bUseCacheDisk;
206 :
207 : /* Per-thread Curl connection cache */
208 : std::map<GIntBig, CachedConnection*> mapConnections;
209 :
210 : char** GetFileList(const char *pszFilename, int* pbGotFileList);
211 :
212 : char** ParseHTMLFileList(const char* pszFilename,
213 : char* pszData,
214 : int* pbGotFileList);
215 : public:
216 : VSICurlFilesystemHandler();
217 : ~VSICurlFilesystemHandler();
218 :
219 : virtual VSIVirtualHandle *Open( const char *pszFilename,
220 : const char *pszAccess);
221 : virtual int Stat( const char *pszFilename, VSIStatBufL *pStatBuf, int nFlags );
222 : virtual int Unlink( const char *pszFilename );
223 : virtual int Rename( const char *oldpath, const char *newpath );
224 : virtual int Mkdir( const char *pszDirname, long nMode );
225 : virtual int Rmdir( const char *pszDirname );
226 : virtual char **ReadDir( const char *pszDirname );
227 : virtual char **ReadDir( const char *pszDirname, int* pbGotFileList );
228 :
229 :
230 : const CachedRegion* GetRegion(const char* pszURL,
231 : vsi_l_offset nFileOffsetStart);
232 :
233 : void AddRegion(const char* pszURL,
234 : vsi_l_offset nFileOffsetStart,
235 : size_t nSize,
236 : const char *pData);
237 :
238 : CachedFileProp* GetCachedFileProp(const char* pszURL);
239 :
240 : void AddRegionToCacheDisk(CachedRegion* psRegion);
241 : const CachedRegion* GetRegionFromCacheDisk(const char* pszURL,
242 : vsi_l_offset nFileOffsetStart);
243 :
244 : CURL *GetCurlHandleFor(CPLString osURL);
245 : };
246 :
247 : /************************************************************************/
248 : /* VSICurlHandle */
249 : /************************************************************************/
250 :
251 : class VSICurlHandle : public VSIVirtualHandle
252 : {
253 : private:
254 : VSICurlFilesystemHandler* poFS;
255 :
256 : char* pszURL;
257 : unsigned long pszURLHash;
258 :
259 : vsi_l_offset curOffset;
260 : vsi_l_offset fileSize;
261 : int bHastComputedFileSize;
262 : ExistStatus eExists;
263 : int bIsDirectory;
264 : time_t mTime;
265 :
266 : vsi_l_offset lastDownloadedOffset;
267 : int nBlocksToDownload;
268 : int bEOF;
269 :
270 : int DownloadRegion(vsi_l_offset startOffset, int nBlocks);
271 :
272 : VSICurlReadCbkFunc pfnReadCbk;
273 : void *pReadCbkUserData;
274 : int bStopOnInterrruptUntilUninstall;
275 : int bInterrupted;
276 :
277 : public:
278 :
279 : VSICurlHandle(VSICurlFilesystemHandler* poFS, const char* pszURL);
280 : ~VSICurlHandle();
281 :
282 : virtual int Seek( vsi_l_offset nOffset, int nWhence );
283 : virtual vsi_l_offset Tell();
284 : virtual size_t Read( void *pBuffer, size_t nSize, size_t nMemb );
285 : virtual int ReadMultiRange( int nRanges, void ** ppData,
286 : const vsi_l_offset* panOffsets, const size_t* panSizes );
287 : virtual size_t Write( const void *pBuffer, size_t nSize, size_t nMemb );
288 : virtual int Eof();
289 : virtual int Flush();
290 : virtual int Close();
291 :
292 1456 : int IsKnownFileSize() const { return bHastComputedFileSize; }
293 : vsi_l_offset GetFileSize();
294 : int Exists();
295 1458 : int IsDirectory() const { return bIsDirectory; }
296 1456 : time_t GetMTime() const { return mTime; }
297 :
298 : int InstallReadCbk(VSICurlReadCbkFunc pfnReadCbk,
299 : void* pfnUserData,
300 : int bStopOnInterrruptUntilUninstall);
301 : int UninstallReadCbk();
302 : };
303 :
304 : /************************************************************************/
305 : /* VSICurlHandle() */
306 : /************************************************************************/
307 :
308 1586 : VSICurlHandle::VSICurlHandle(VSICurlFilesystemHandler* poFS, const char* pszURL)
309 : {
310 1586 : this->poFS = poFS;
311 1586 : this->pszURL = CPLStrdup(pszURL);
312 :
313 1586 : curOffset = 0;
314 :
315 1586 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
316 1586 : eExists = cachedFileProp->eExists;
317 1586 : fileSize = cachedFileProp->fileSize;
318 1586 : bHastComputedFileSize = cachedFileProp->bHastComputedFileSize;
319 1586 : bIsDirectory = cachedFileProp->bIsDirectory;
320 1586 : mTime = cachedFileProp->mTime;
321 :
322 1586 : lastDownloadedOffset = -1;
323 1586 : nBlocksToDownload = 1;
324 1586 : bEOF = FALSE;
325 :
326 1586 : pfnReadCbk = NULL;
327 1586 : pReadCbkUserData = NULL;
328 1586 : bStopOnInterrruptUntilUninstall = FALSE;
329 1586 : bInterrupted = FALSE;
330 1586 : }
331 :
332 : /************************************************************************/
333 : /* ~VSICurlHandle() */
334 : /************************************************************************/
335 :
336 1586 : VSICurlHandle::~VSICurlHandle()
337 : {
338 1586 : CPLFree(pszURL);
339 1586 : }
340 :
341 : /************************************************************************/
342 : /* InstallReadCbk() */
343 : /************************************************************************/
344 :
345 2 : int VSICurlHandle::InstallReadCbk(VSICurlReadCbkFunc pfnReadCbkIn,
346 : void* pfnUserDataIn,
347 : int bStopOnInterrruptUntilUninstallIn)
348 : {
349 2 : if (pfnReadCbk != NULL)
350 0 : return FALSE;
351 :
352 2 : pfnReadCbk = pfnReadCbkIn;
353 2 : pReadCbkUserData = pfnUserDataIn;
354 2 : bStopOnInterrruptUntilUninstall = bStopOnInterrruptUntilUninstallIn;
355 2 : bInterrupted = FALSE;
356 2 : return TRUE;
357 : }
358 :
359 : /************************************************************************/
360 : /* UninstallReadCbk() */
361 : /************************************************************************/
362 :
363 2 : int VSICurlHandle::UninstallReadCbk()
364 : {
365 2 : if (pfnReadCbk == NULL)
366 0 : return FALSE;
367 :
368 2 : pfnReadCbk = NULL;
369 2 : pReadCbkUserData = NULL;
370 2 : bStopOnInterrruptUntilUninstall = FALSE;
371 2 : bInterrupted = FALSE;
372 2 : return TRUE;
373 : }
374 :
375 : /************************************************************************/
376 : /* Seek() */
377 : /************************************************************************/
378 :
379 1084 : int VSICurlHandle::Seek( vsi_l_offset nOffset, int nWhence )
380 : {
381 1084 : if (nWhence == SEEK_SET)
382 : {
383 664 : curOffset = nOffset;
384 : }
385 420 : else if (nWhence == SEEK_CUR)
386 : {
387 304 : curOffset = curOffset + nOffset;
388 : }
389 : else
390 : {
391 116 : curOffset = GetFileSize() + nOffset;
392 : }
393 1084 : bEOF = FALSE;
394 1084 : return 0;
395 : }
396 :
397 : /************************************************************************/
398 : /* VSICurlSetOptions() */
399 : /************************************************************************/
400 :
401 132 : static void VSICurlSetOptions(CURL* hCurlHandle, const char* pszURL)
402 : {
403 132 : curl_easy_setopt(hCurlHandle, CURLOPT_URL, pszURL);
404 132 : if (CSLTestBoolean(CPLGetConfigOption("CPL_CURL_VERBOSE", "NO")))
405 0 : curl_easy_setopt(hCurlHandle, CURLOPT_VERBOSE, 1);
406 :
407 : /* Set Proxy parameters */
408 132 : const char* pszProxy = CPLGetConfigOption("GDAL_HTTP_PROXY", NULL);
409 132 : if (pszProxy)
410 0 : curl_easy_setopt(hCurlHandle,CURLOPT_PROXY,pszProxy);
411 :
412 132 : const char* pszProxyUserPwd = CPLGetConfigOption("GDAL_HTTP_PROXYUSERPWD", NULL);
413 132 : if (pszProxyUserPwd)
414 0 : curl_easy_setopt(hCurlHandle,CURLOPT_PROXYUSERPWD,pszProxyUserPwd);
415 :
416 : /* Enable following redirections. Requires libcurl 7.10.1 at least */
417 132 : curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 1);
418 132 : curl_easy_setopt(hCurlHandle, CURLOPT_MAXREDIRS, 10);
419 :
420 : /* 7.16 */
421 : #if LIBCURL_VERSION_NUM >= 0x071000
422 132 : long option = CURLFTPMETHOD_SINGLECWD;
423 132 : curl_easy_setopt(hCurlHandle, CURLOPT_FTP_FILEMETHOD, option);
424 : #endif
425 :
426 : /* 7.12.3 */
427 : #if LIBCURL_VERSION_NUM > 0x070C03
428 : /* ftp://ftp2.cits.rncan.gc.ca/pub/cantopo/250k_tif/ doesn't like EPSV command */
429 132 : curl_easy_setopt(hCurlHandle, CURLOPT_FTP_USE_EPSV, 0);
430 : #endif
431 :
432 : /* NOSIGNAL should be set to true for timeout to work in multithread
433 : environments on Unix, requires libcurl 7.10 or more recent.
434 : (this force avoiding the use of sgnal handlers) */
435 :
436 : /* 7.10 */
437 : #if LIBCURL_VERSION_NUM >= 0x070A00
438 132 : curl_easy_setopt(hCurlHandle, CURLOPT_NOSIGNAL, 1);
439 : #endif
440 :
441 132 : curl_easy_setopt(hCurlHandle, CURLOPT_NOBODY, 0);
442 132 : curl_easy_setopt(hCurlHandle, CURLOPT_HTTPGET, 1);
443 132 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADER, 0);
444 :
445 : /* 7.16.4 */
446 : #if LIBCURL_VERSION_NUM <= 0x071004
447 : curl_easy_setopt(hCurlHandle, CURLOPT_FTPLISTONLY, 0);
448 : #elif LIBCURL_VERSION_NUM > 0x071004
449 132 : curl_easy_setopt(hCurlHandle, CURLOPT_DIRLISTONLY, 0);
450 : #endif
451 :
452 132 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
453 132 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
454 132 : }
455 :
456 :
457 : typedef struct
458 : {
459 : char* pBuffer;
460 : size_t nSize;
461 : int bIsHTTP;
462 : int bIsInHeader;
463 : int bMultiRange;
464 : vsi_l_offset nStartOffset;
465 : vsi_l_offset nEndOffset;
466 : int nHTTPCode;
467 : vsi_l_offset nContentLength;
468 : int bFoundContentRange;
469 : int bError;
470 : int bDownloadHeaderOnly;
471 :
472 : VSILFILE *fp;
473 : VSICurlReadCbkFunc pfnReadCbk;
474 : void *pReadCbkUserData;
475 : int bInterrupted;
476 : } WriteFuncStruct;
477 :
478 : /************************************************************************/
479 : /* VSICURLInitWriteFuncStruct() */
480 : /************************************************************************/
481 :
482 232 : static void VSICURLInitWriteFuncStruct(WriteFuncStruct *psStruct,
483 : VSILFILE *fp,
484 : VSICurlReadCbkFunc pfnReadCbk,
485 : void *pReadCbkUserData)
486 : {
487 232 : psStruct->pBuffer = NULL;
488 232 : psStruct->nSize = 0;
489 232 : psStruct->bIsHTTP = FALSE;
490 232 : psStruct->bIsInHeader = TRUE;
491 232 : psStruct->bMultiRange = FALSE;
492 232 : psStruct->nStartOffset = 0;
493 232 : psStruct->nEndOffset = 0;
494 232 : psStruct->nHTTPCode = 0;
495 232 : psStruct->nContentLength = 0;
496 232 : psStruct->bFoundContentRange = FALSE;
497 232 : psStruct->bError = FALSE;
498 232 : psStruct->bDownloadHeaderOnly = FALSE;
499 :
500 232 : psStruct->fp = fp;
501 232 : psStruct->pfnReadCbk = pfnReadCbk;
502 232 : psStruct->pReadCbkUserData = pReadCbkUserData;
503 232 : psStruct->bInterrupted = FALSE;
504 232 : }
505 :
506 : /************************************************************************/
507 : /* VSICurlHandleWriteFunc() */
508 : /************************************************************************/
509 :
510 2966 : static int VSICurlHandleWriteFunc(void *buffer, size_t count, size_t nmemb, void *req)
511 : {
512 2966 : WriteFuncStruct* psStruct = (WriteFuncStruct*) req;
513 2966 : size_t nSize = count * nmemb;
514 :
515 : char* pNewBuffer = (char*) VSIRealloc(psStruct->pBuffer,
516 2966 : psStruct->nSize + nSize + 1);
517 2966 : if (pNewBuffer)
518 : {
519 2966 : psStruct->pBuffer = pNewBuffer;
520 2966 : memcpy(psStruct->pBuffer + psStruct->nSize, buffer, nSize);
521 2966 : psStruct->pBuffer[psStruct->nSize + nSize] = '\0';
522 4066 : if (psStruct->bIsHTTP && psStruct->bIsInHeader)
523 : {
524 1104 : char* pszLine = psStruct->pBuffer + psStruct->nSize;
525 1186 : if (EQUALN(pszLine, "HTTP/1.0 ", 9) ||
526 : EQUALN(pszLine, "HTTP/1.1 ", 9))
527 82 : psStruct->nHTTPCode = atoi(pszLine + 9);
528 1022 : else if (EQUALN(pszLine, "Content-Length: ", 16))
529 : psStruct->nContentLength = CPLScanUIntBig(pszLine + 16,
530 80 : strlen(pszLine + 16));
531 942 : else if (EQUALN(pszLine, "Content-Range: ", 15))
532 12 : psStruct->bFoundContentRange = TRUE;
533 :
534 : /*if (nSize > 2 && pszLine[nSize - 2] == '\r' &&
535 : pszLine[nSize - 1] == '\n')
536 : {
537 : pszLine[nSize - 2] = 0;
538 : CPLDebug("VSICURL", "%s", pszLine);
539 : pszLine[nSize - 2] = '\r';
540 : }*/
541 :
542 1104 : if (pszLine[0] == '\r' || pszLine[0] == '\n')
543 : {
544 82 : if (psStruct->bDownloadHeaderOnly)
545 : {
546 : /* If moved permanently/temporarily, go on. Otherwise stop now*/
547 6 : if (!(psStruct->nHTTPCode == 301 || psStruct->nHTTPCode == 302))
548 4 : return 0;
549 : }
550 : else
551 : {
552 76 : psStruct->bIsInHeader = FALSE;
553 :
554 : /* Detect servers that don't support range downloading */
555 76 : if (psStruct->nHTTPCode == 200 &&
556 : !psStruct->bMultiRange &&
557 : !psStruct->bFoundContentRange &&
558 : (psStruct->nStartOffset != 0 || psStruct->nContentLength > 10 *
559 : (psStruct->nEndOffset - psStruct->nStartOffset + 1)))
560 : {
561 : CPLError(CE_Failure, CPLE_AppDefined,
562 0 : "Range downloading not supported by this server !");
563 0 : psStruct->bError = TRUE;
564 0 : return 0;
565 : }
566 : }
567 : }
568 : }
569 : else
570 : {
571 1862 : if (psStruct->pfnReadCbk)
572 : {
573 12 : if ( ! psStruct->pfnReadCbk(psStruct->fp, buffer, nSize,
574 : psStruct->pReadCbkUserData) )
575 : {
576 2 : psStruct->bInterrupted = TRUE;
577 2 : return 0;
578 : }
579 : }
580 : }
581 2960 : psStruct->nSize += nSize;
582 2960 : return nmemb;
583 : }
584 : else
585 : {
586 0 : return 0;
587 : }
588 : }
589 :
590 :
591 : /************************************************************************/
592 : /* GetFileSize() */
593 : /************************************************************************/
594 :
595 172 : vsi_l_offset VSICurlHandle::GetFileSize()
596 : {
597 : WriteFuncStruct sWriteFuncData;
598 : WriteFuncStruct sWriteFuncHeaderData;
599 :
600 172 : if (bHastComputedFileSize)
601 154 : return fileSize;
602 :
603 18 : bHastComputedFileSize = TRUE;
604 :
605 : /* Consider that only the files whose extension ends up with one that is */
606 : /* listed in CPL_VSIL_CURL_ALLOWED_EXTENSIONS exist on the server */
607 : /* This can speeds up dramatically open experience, in case the server */
608 : /* cannot return a file list */
609 : /* For example : */
610 : /* gdalinfo --config CPL_VSIL_CURL_ALLOWED_EXTENSIONS ".tif" /vsicurl/http://igskmncngs506.cr.usgs.gov/gmted/Global_tiles_GMTED/075darcsec/bln/W030/30N030W_20101117_gmted_bln075.tif */
611 : const char* pszAllowedExtensions =
612 18 : CPLGetConfigOption("CPL_VSIL_CURL_ALLOWED_EXTENSIONS", NULL);
613 18 : if (pszAllowedExtensions)
614 : {
615 2 : char** papszExtensions = CSLTokenizeString2( pszAllowedExtensions, ", ", 0 );
616 2 : int nURLLen = strlen(pszURL);
617 2 : int bFound = FALSE;
618 2 : for(int i=0;papszExtensions[i] != NULL;i++)
619 : {
620 2 : int nExtensionLen = strlen(papszExtensions[i]);
621 4 : if (nURLLen > nExtensionLen &&
622 2 : EQUAL(pszURL + nURLLen - nExtensionLen, papszExtensions[i]))
623 : {
624 2 : bFound = TRUE;
625 2 : break;
626 : }
627 : }
628 :
629 2 : if (!bFound)
630 : {
631 0 : eExists = EXIST_NO;
632 0 : fileSize = 0;
633 :
634 0 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
635 0 : cachedFileProp->bHastComputedFileSize = TRUE;
636 0 : cachedFileProp->fileSize = fileSize;
637 0 : cachedFileProp->eExists = eExists;
638 :
639 0 : CSLDestroy(papszExtensions);
640 :
641 0 : return 0;
642 : }
643 :
644 2 : CSLDestroy(papszExtensions);
645 : }
646 :
647 : #if LIBCURL_VERSION_NUM < 0x070B00
648 : /* Curl 7.10.X doesn't manage to unset the CURLOPT_RANGE that would have been */
649 : /* previously set, so we have to reinit the connection handle */
650 : poFS->GetCurlHandleFor("");
651 : #endif
652 18 : CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
653 :
654 18 : VSICurlSetOptions(hCurlHandle, pszURL);
655 :
656 18 : VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, NULL, NULL, NULL);
657 :
658 : /* HACK for mbtiles driver: proper fix would be to auto-detect servers that don't accept HEAD */
659 : /* http://a.tiles.mapbox.com/v3/ doesn't accept HEAD, so let's start a GET */
660 : /* and interrupt is as soon as the header is found */
661 18 : if (strstr(pszURL, ".tiles.mapbox.com/") != NULL)
662 : {
663 4 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
664 4 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
665 :
666 4 : sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
667 4 : sWriteFuncHeaderData.bDownloadHeaderOnly = TRUE;
668 : }
669 : else
670 : {
671 14 : curl_easy_setopt(hCurlHandle, CURLOPT_NOBODY, 1);
672 14 : curl_easy_setopt(hCurlHandle, CURLOPT_HTTPGET, 0);
673 14 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADER, 1);
674 : }
675 :
676 : /* We need that otherwise OSGEO4W's libcurl issue a dummy range request */
677 : /* when doing a HEAD when recycling connections */
678 18 : curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, NULL);
679 :
680 : /* Bug with older curl versions (<=7.16.4) and FTP. See http://curl.haxx.se/mail/lib-2007-08/0312.html */
681 18 : VSICURLInitWriteFuncStruct(&sWriteFuncData, NULL, NULL, NULL);
682 18 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
683 18 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
684 :
685 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
686 18 : szCurlErrBuf[0] = '\0';
687 18 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
688 :
689 18 : double dfSize = 0;
690 18 : curl_easy_perform(hCurlHandle);
691 :
692 18 : eExists = EXIST_UNKNOWN;
693 :
694 18 : if (strncmp(pszURL, "ftp", 3) == 0)
695 : {
696 0 : if (sWriteFuncData.pBuffer != NULL &&
697 : strncmp(sWriteFuncData.pBuffer, "Content-Length: ", strlen( "Content-Length: ")) == 0)
698 : {
699 0 : const char* pszBuffer = sWriteFuncData.pBuffer + strlen("Content-Length: ");
700 0 : eExists = EXIST_YES;
701 0 : fileSize = CPLScanUIntBig(pszBuffer, sWriteFuncData.nSize - strlen("Content-Length: "));
702 : if (ENABLE_DEBUG)
703 : CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB,
704 0 : pszURL, fileSize);
705 : }
706 : }
707 :
708 18 : if (eExists != EXIST_YES)
709 : {
710 18 : CURLcode code = curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &dfSize );
711 18 : if (code == 0)
712 : {
713 18 : eExists = EXIST_YES;
714 18 : if (dfSize < 0)
715 0 : fileSize = 0;
716 : else
717 18 : fileSize = (GUIntBig)dfSize;
718 : }
719 : else
720 : {
721 0 : eExists = EXIST_NO;
722 0 : fileSize = 0;
723 0 : CPLError(CE_Failure, CPLE_AppDefined, "VSICurlHandle::GetFileSize failed");
724 : }
725 :
726 18 : long response_code = 0;
727 18 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
728 18 : if (response_code != 200)
729 : {
730 2 : eExists = EXIST_NO;
731 2 : fileSize = 0;
732 : }
733 :
734 : /* Try to guess if this is a directory. Generally if this is a directory, */
735 : /* curl will retry with an URL with slash added */
736 18 : char *pszEffectiveURL = NULL;
737 18 : curl_easy_getinfo(hCurlHandle, CURLINFO_EFFECTIVE_URL, &pszEffectiveURL);
738 34 : if (pszEffectiveURL != NULL && strncmp(pszURL, pszEffectiveURL, strlen(pszURL)) == 0 &&
739 16 : pszEffectiveURL[strlen(pszURL)] == '/')
740 : {
741 2 : eExists = EXIST_YES;
742 2 : fileSize = 0;
743 2 : bIsDirectory = TRUE;
744 : }
745 :
746 : if (ENABLE_DEBUG)
747 : CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB " response_code=%d",
748 18 : pszURL, fileSize, (int)response_code);
749 : }
750 :
751 18 : CPLFree(sWriteFuncData.pBuffer);
752 18 : CPLFree(sWriteFuncHeaderData.pBuffer);
753 :
754 18 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
755 18 : cachedFileProp->bHastComputedFileSize = TRUE;
756 18 : cachedFileProp->fileSize = fileSize;
757 18 : cachedFileProp->eExists = eExists;
758 18 : cachedFileProp->bIsDirectory = bIsDirectory;
759 :
760 18 : return fileSize;
761 : }
762 :
763 : /************************************************************************/
764 : /* Exists() */
765 : /************************************************************************/
766 :
767 1490 : int VSICurlHandle::Exists()
768 : {
769 1490 : if (eExists == EXIST_UNKNOWN)
770 12 : GetFileSize();
771 1490 : return eExists == EXIST_YES;
772 : }
773 :
774 : /************************************************************************/
775 : /* Tell() */
776 : /************************************************************************/
777 :
778 288 : vsi_l_offset VSICurlHandle::Tell()
779 : {
780 288 : return curOffset;
781 : }
782 :
783 : /************************************************************************/
784 : /* DownloadRegion() */
785 : /************************************************************************/
786 :
787 80 : int VSICurlHandle::DownloadRegion(vsi_l_offset startOffset, int nBlocks)
788 : {
789 : WriteFuncStruct sWriteFuncData;
790 : WriteFuncStruct sWriteFuncHeaderData;
791 :
792 80 : if (bInterrupted && bStopOnInterrruptUntilUninstall)
793 0 : return FALSE;
794 :
795 80 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
796 80 : if (cachedFileProp->eExists == EXIST_NO)
797 0 : return FALSE;
798 :
799 80 : CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
800 80 : VSICurlSetOptions(hCurlHandle, pszURL);
801 :
802 80 : VSICURLInitWriteFuncStruct(&sWriteFuncData, (VSILFILE*)this, pfnReadCbk, pReadCbkUserData);
803 80 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
804 80 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
805 :
806 80 : VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, NULL, NULL, NULL);
807 80 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
808 80 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
809 80 : sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
810 80 : sWriteFuncHeaderData.nStartOffset = startOffset;
811 80 : sWriteFuncHeaderData.nEndOffset = startOffset + nBlocks * DOWNLOAD_CHUNCK_SIZE - 1;
812 :
813 : char rangeStr[512];
814 80 : sprintf(rangeStr, CPL_FRMT_GUIB "-" CPL_FRMT_GUIB, startOffset, startOffset + nBlocks * DOWNLOAD_CHUNCK_SIZE - 1);
815 :
816 : if (ENABLE_DEBUG)
817 80 : CPLDebug("VSICURL", "Downloading %s (%s)...", rangeStr, pszURL);
818 :
819 80 : curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, rangeStr);
820 :
821 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
822 80 : szCurlErrBuf[0] = '\0';
823 80 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
824 :
825 80 : curl_easy_perform(hCurlHandle);
826 :
827 80 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, NULL);
828 80 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, NULL);
829 80 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
830 80 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
831 :
832 80 : if (sWriteFuncData.bInterrupted)
833 : {
834 2 : bInterrupted = TRUE;
835 :
836 2 : CPLFree(sWriteFuncData.pBuffer);
837 2 : CPLFree(sWriteFuncHeaderData.pBuffer);
838 :
839 2 : return FALSE;
840 : }
841 :
842 78 : long response_code = 0;
843 78 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
844 :
845 78 : char *content_type = 0;
846 78 : curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_TYPE, &content_type);
847 :
848 : if (ENABLE_DEBUG)
849 78 : CPLDebug("VSICURL", "Got reponse_code=%ld", response_code);
850 :
851 78 : if ((response_code != 200 && response_code != 206 &&
852 : response_code != 225 && response_code != 226 && response_code != 426) || sWriteFuncHeaderData.bError)
853 : {
854 0 : if (response_code >= 400 && szCurlErrBuf[0] != '\0')
855 : {
856 0 : if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
857 : CPLError(CE_Failure, CPLE_AppDefined, "%d: %s, %s",
858 : (int)response_code, szCurlErrBuf,
859 0 : "Range downloading not supported by this server !");
860 : else
861 0 : CPLError(CE_Failure, CPLE_AppDefined, "%d: %s", (int)response_code, szCurlErrBuf);
862 : }
863 0 : if (!bHastComputedFileSize && startOffset == 0)
864 : {
865 0 : cachedFileProp->bHastComputedFileSize = bHastComputedFileSize = TRUE;
866 0 : cachedFileProp->fileSize = fileSize = 0;
867 0 : cachedFileProp->eExists = eExists = EXIST_NO;
868 : }
869 0 : CPLFree(sWriteFuncData.pBuffer);
870 0 : CPLFree(sWriteFuncHeaderData.pBuffer);
871 0 : return FALSE;
872 : }
873 :
874 78 : if (!bHastComputedFileSize && sWriteFuncHeaderData.pBuffer)
875 : {
876 : /* Try to retrieve the filesize from the HTTP headers */
877 : /* if in the form : "Content-Range: bytes x-y/filesize" */
878 14 : char* pszContentRange = strstr(sWriteFuncHeaderData.pBuffer, "Content-Range: bytes ");
879 14 : if (pszContentRange)
880 : {
881 4 : char* pszEOL = strchr(pszContentRange, '\n');
882 4 : if (pszEOL)
883 : {
884 4 : *pszEOL = 0;
885 4 : pszEOL = strchr(pszContentRange, '\r');
886 4 : if (pszEOL)
887 4 : *pszEOL = 0;
888 4 : char* pszSlash = strchr(pszContentRange, '/');
889 4 : if (pszSlash)
890 : {
891 4 : pszSlash ++;
892 4 : fileSize = CPLScanUIntBig(pszSlash, strlen(pszSlash));
893 : }
894 : }
895 : }
896 10 : else if (strncmp(pszURL, "ftp", 3) == 0)
897 : {
898 : /* Parse 213 answer for FTP protocol */
899 0 : char* pszSize = strstr(sWriteFuncHeaderData.pBuffer, "213 ");
900 0 : if (pszSize)
901 : {
902 0 : pszSize += 4;
903 0 : char* pszEOL = strchr(pszSize, '\n');
904 0 : if (pszEOL)
905 : {
906 0 : *pszEOL = 0;
907 0 : pszEOL = strchr(pszSize, '\r');
908 0 : if (pszEOL)
909 0 : *pszEOL = 0;
910 :
911 0 : fileSize = CPLScanUIntBig(pszSize, strlen(pszSize));
912 : }
913 : }
914 : }
915 :
916 14 : if (fileSize != 0)
917 : {
918 4 : eExists = EXIST_YES;
919 :
920 : if (ENABLE_DEBUG)
921 : CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB " response_code=%d",
922 4 : pszURL, fileSize, (int)response_code);
923 :
924 4 : bHastComputedFileSize = cachedFileProp->bHastComputedFileSize = TRUE;
925 4 : cachedFileProp->fileSize = fileSize;
926 4 : cachedFileProp->eExists = eExists;
927 : }
928 : }
929 :
930 78 : lastDownloadedOffset = startOffset + nBlocks * DOWNLOAD_CHUNCK_SIZE;
931 :
932 78 : char* pBuffer = sWriteFuncData.pBuffer;
933 78 : int nSize = sWriteFuncData.nSize;
934 :
935 78 : if (nSize > nBlocks * DOWNLOAD_CHUNCK_SIZE)
936 : {
937 : if (ENABLE_DEBUG)
938 : CPLDebug("VSICURL", "Got more data than expected : %d instead of %d",
939 0 : nSize, nBlocks * DOWNLOAD_CHUNCK_SIZE);
940 : }
941 :
942 240 : while(nSize > 0)
943 : {
944 : //if (ENABLE_DEBUG)
945 : // CPLDebug("VSICURL", "Add region %d - %d", startOffset, MIN(DOWNLOAD_CHUNCK_SIZE, nSize));
946 84 : poFS->AddRegion(pszURL, startOffset, MIN(DOWNLOAD_CHUNCK_SIZE, nSize), pBuffer);
947 84 : startOffset += DOWNLOAD_CHUNCK_SIZE;
948 84 : pBuffer += DOWNLOAD_CHUNCK_SIZE;
949 84 : nSize -= DOWNLOAD_CHUNCK_SIZE;
950 : }
951 :
952 78 : CPLFree(sWriteFuncData.pBuffer);
953 78 : CPLFree(sWriteFuncHeaderData.pBuffer);
954 :
955 78 : return TRUE;
956 : }
957 :
958 : /************************************************************************/
959 : /* Read() */
960 : /************************************************************************/
961 :
962 12196 : size_t VSICurlHandle::Read( void *pBuffer, size_t nSize, size_t nMemb )
963 : {
964 12196 : size_t nBufferRequestSize = nSize * nMemb;
965 12196 : if (nBufferRequestSize == 0)
966 0 : return 0;
967 :
968 : //CPLDebug("VSICURL", "offset=%d, size=%d", (int)curOffset, (int)nBufferRequestSize);
969 :
970 12196 : vsi_l_offset iterOffset = curOffset;
971 36568 : while (nBufferRequestSize)
972 : {
973 12200 : const CachedRegion* psRegion = poFS->GetRegion(pszURL, iterOffset);
974 12200 : if (psRegion == NULL)
975 : {
976 : vsi_l_offset nOffsetToDownload =
977 80 : (iterOffset / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
978 :
979 80 : if (nOffsetToDownload == lastDownloadedOffset)
980 : {
981 : /* In case of consecutive reads (of small size), we use a */
982 : /* heuristic that we will read the file sequentially, so */
983 : /* we double the requested size to decrease the number of */
984 : /* client/server roundtrips. */
985 2 : if (nBlocksToDownload < 100)
986 2 : nBlocksToDownload *= 2;
987 : }
988 : else
989 : {
990 : /* Random reads. Cancel the above heuristics */
991 78 : nBlocksToDownload = 1;
992 : }
993 :
994 : /* Ensure that we will request at least the number of blocks */
995 : /* to satisfy the remaining buffer size to read */
996 : vsi_l_offset nEndOffsetToDownload =
997 80 : ((iterOffset + nBufferRequestSize) / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
998 : int nMinBlocksToDownload = 1 + (int)
999 80 : ((nEndOffsetToDownload - nOffsetToDownload) / DOWNLOAD_CHUNCK_SIZE);
1000 80 : if (nBlocksToDownload < nMinBlocksToDownload)
1001 4 : nBlocksToDownload = nMinBlocksToDownload;
1002 :
1003 : int i;
1004 : /* Avoid reading already cached data */
1005 86 : for(i=1;i<nBlocksToDownload;i++)
1006 : {
1007 6 : if (poFS->GetRegion(pszURL, nOffsetToDownload + i * DOWNLOAD_CHUNCK_SIZE) != NULL)
1008 : {
1009 0 : nBlocksToDownload = i;
1010 0 : break;
1011 : }
1012 : }
1013 :
1014 80 : if (DownloadRegion(nOffsetToDownload, nBlocksToDownload) == FALSE)
1015 : {
1016 2 : if (!bInterrupted)
1017 0 : bEOF = TRUE;
1018 2 : return 0;
1019 : }
1020 78 : psRegion = poFS->GetRegion(pszURL, iterOffset);
1021 : }
1022 12198 : if (psRegion == NULL || psRegion->pData == NULL)
1023 : {
1024 0 : bEOF = TRUE;
1025 0 : return 0;
1026 : }
1027 12198 : int nToCopy = (int) MIN(nBufferRequestSize, psRegion->nSize - (iterOffset - psRegion->nFileOffsetStart));
1028 : memcpy(pBuffer, psRegion->pData + iterOffset - psRegion->nFileOffsetStart,
1029 12198 : nToCopy);
1030 12198 : pBuffer = (char*) pBuffer + nToCopy;
1031 12198 : iterOffset += nToCopy;
1032 12198 : nBufferRequestSize -= nToCopy;
1033 12198 : if (psRegion->nSize != DOWNLOAD_CHUNCK_SIZE && nBufferRequestSize != 0)
1034 : {
1035 22 : break;
1036 : }
1037 : }
1038 :
1039 12194 : size_t ret = (size_t) ((iterOffset - curOffset) / nSize);
1040 12194 : if (ret != nMemb)
1041 22 : bEOF = TRUE;
1042 :
1043 12194 : curOffset = iterOffset;
1044 :
1045 12194 : return ret;
1046 : }
1047 :
1048 :
1049 : /************************************************************************/
1050 : /* ReadMultiRange() */
1051 : /************************************************************************/
1052 :
1053 2 : int VSICurlHandle::ReadMultiRange( int nRanges, void ** ppData,
1054 : const vsi_l_offset* panOffsets,
1055 : const size_t* panSizes )
1056 : {
1057 : WriteFuncStruct sWriteFuncData;
1058 : WriteFuncStruct sWriteFuncHeaderData;
1059 :
1060 2 : if (bInterrupted && bStopOnInterrruptUntilUninstall)
1061 0 : return FALSE;
1062 :
1063 2 : CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
1064 2 : if (cachedFileProp->eExists == EXIST_NO)
1065 0 : return -1;
1066 :
1067 2 : CPLString osRanges, osFirstRange, osLastRange;
1068 : int i;
1069 2 : int nMergedRanges = 0;
1070 2 : vsi_l_offset nTotalReqSize = 0;
1071 258 : for(i=0;i<nRanges;i++)
1072 : {
1073 256 : CPLString osCurRange;
1074 256 : if (i != 0)
1075 254 : osRanges.append(",");
1076 256 : osCurRange = CPLSPrintf(CPL_FRMT_GUIB "-", panOffsets[i]);
1077 512 : while (i + 1 < nRanges && panOffsets[i] + panSizes[i] == panOffsets[i+1])
1078 : {
1079 0 : nTotalReqSize += panSizes[i];
1080 0 : i ++;
1081 : }
1082 256 : nTotalReqSize += panSizes[i];
1083 256 : osCurRange.append(CPLSPrintf(CPL_FRMT_GUIB, panOffsets[i] + panSizes[i]-1));
1084 256 : nMergedRanges ++;
1085 :
1086 256 : osRanges += osCurRange;
1087 :
1088 256 : if (nMergedRanges == 1)
1089 2 : osFirstRange = osCurRange;
1090 256 : osLastRange = osCurRange;
1091 : }
1092 :
1093 2 : const char* pszMaxRanges = CPLGetConfigOption("CPL_VSIL_CURL_MAX_RANGES", "250");
1094 2 : int nMaxRanges = atoi(pszMaxRanges);
1095 2 : if (nMaxRanges <= 0)
1096 0 : nMaxRanges = 250;
1097 2 : if (nMergedRanges > nMaxRanges)
1098 : {
1099 0 : int nHalf = nRanges / 2;
1100 0 : int nRet = ReadMultiRange(nHalf, ppData, panOffsets, panSizes);
1101 0 : if (nRet != 0)
1102 0 : return nRet;
1103 0 : return ReadMultiRange(nRanges - nHalf, ppData + nHalf, panOffsets + nHalf, panSizes + nHalf);
1104 : }
1105 :
1106 2 : CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
1107 2 : VSICurlSetOptions(hCurlHandle, pszURL);
1108 :
1109 2 : VSICURLInitWriteFuncStruct(&sWriteFuncData, (VSILFILE*)this, pfnReadCbk, pReadCbkUserData);
1110 2 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
1111 2 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
1112 :
1113 2 : VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, NULL, NULL, NULL);
1114 2 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
1115 2 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
1116 2 : sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
1117 2 : sWriteFuncHeaderData.bMultiRange = nMergedRanges > 1;
1118 2 : if (nMergedRanges == 1)
1119 : {
1120 0 : sWriteFuncHeaderData.nStartOffset = panOffsets[0];
1121 0 : sWriteFuncHeaderData.nEndOffset = panOffsets[0] + nTotalReqSize-1;
1122 : }
1123 :
1124 : if (ENABLE_DEBUG)
1125 : {
1126 2 : if (nMergedRanges == 1)
1127 0 : CPLDebug("VSICURL", "Downloading %s (%s)...", osRanges.c_str(), pszURL);
1128 : else
1129 : CPLDebug("VSICURL", "Downloading %s, ..., %s (" CPL_FRMT_GUIB " bytes, %s)...",
1130 2 : osFirstRange.c_str(), osLastRange.c_str(), (GUIntBig)nTotalReqSize, pszURL);
1131 : }
1132 :
1133 2 : curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, osRanges.c_str());
1134 :
1135 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
1136 2 : szCurlErrBuf[0] = '\0';
1137 2 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
1138 :
1139 2 : curl_easy_perform(hCurlHandle);
1140 :
1141 2 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, NULL);
1142 2 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, NULL);
1143 2 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
1144 2 : curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
1145 :
1146 2 : if (sWriteFuncData.bInterrupted)
1147 : {
1148 0 : bInterrupted = TRUE;
1149 :
1150 0 : CPLFree(sWriteFuncData.pBuffer);
1151 0 : CPLFree(sWriteFuncHeaderData.pBuffer);
1152 :
1153 0 : return -1;
1154 : }
1155 :
1156 2 : long response_code = 0;
1157 2 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
1158 :
1159 2 : char *content_type = 0;
1160 2 : curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_TYPE, &content_type);
1161 :
1162 2 : if ((response_code != 200 && response_code != 206 &&
1163 : response_code != 225 && response_code != 226 && response_code != 426) || sWriteFuncHeaderData.bError)
1164 : {
1165 0 : if (response_code >= 400 && szCurlErrBuf[0] != '\0')
1166 : {
1167 0 : if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
1168 : CPLError(CE_Failure, CPLE_AppDefined, "%d: %s, %s",
1169 : (int)response_code, szCurlErrBuf,
1170 0 : "Range downloading not supported by this server !");
1171 : else
1172 0 : CPLError(CE_Failure, CPLE_AppDefined, "%d: %s", (int)response_code, szCurlErrBuf);
1173 : }
1174 : /*
1175 : if (!bHastComputedFileSize && startOffset == 0)
1176 : {
1177 : cachedFileProp->bHastComputedFileSize = bHastComputedFileSize = TRUE;
1178 : cachedFileProp->fileSize = fileSize = 0;
1179 : cachedFileProp->eExists = eExists = EXIST_NO;
1180 : }
1181 : */
1182 0 : CPLFree(sWriteFuncData.pBuffer);
1183 0 : CPLFree(sWriteFuncHeaderData.pBuffer);
1184 0 : return -1;
1185 : }
1186 :
1187 2 : char* pBuffer = sWriteFuncData.pBuffer;
1188 2 : int nSize = sWriteFuncData.nSize;
1189 :
1190 2 : int nRet = -1;
1191 : char* pszBoundary;
1192 2 : CPLString osBoundary;
1193 : char *pszNext;
1194 2 : int iRange = 0;
1195 2 : int iPart = 0;
1196 : char* pszEOL;
1197 :
1198 : /* -------------------------------------------------------------------- */
1199 : /* No multipart if a single range has been requested */
1200 : /* -------------------------------------------------------------------- */
1201 :
1202 2 : if (nMergedRanges == 1)
1203 : {
1204 0 : int nAccSize = 0;
1205 0 : if ((vsi_l_offset)nSize < nTotalReqSize)
1206 0 : goto end;
1207 :
1208 0 : for(i=0;i<nRanges;i++)
1209 : {
1210 0 : memcpy(ppData[i], pBuffer + nAccSize, panSizes[i]);
1211 0 : nAccSize += panSizes[i];
1212 : }
1213 :
1214 0 : nRet = 0;
1215 0 : goto end;
1216 : }
1217 :
1218 : /* -------------------------------------------------------------------- */
1219 : /* Extract boundary name */
1220 : /* -------------------------------------------------------------------- */
1221 :
1222 : pszBoundary = strstr(sWriteFuncHeaderData.pBuffer,
1223 2 : "Content-Type: multipart/byteranges; boundary=");
1224 2 : if( pszBoundary == NULL )
1225 : {
1226 : CPLError( CE_Failure, CPLE_AppDefined, "Could not find '%s'",
1227 0 : "Content-Type: multipart/byteranges; boundary=" );
1228 0 : goto end;
1229 : }
1230 :
1231 2 : pszBoundary += strlen( "Content-Type: multipart/byteranges; boundary=" );
1232 :
1233 2 : pszEOL = strchr(pszBoundary, '\r');
1234 2 : if (pszEOL)
1235 2 : *pszEOL = 0;
1236 2 : pszEOL = strchr(pszBoundary, '\n');
1237 2 : if (pszEOL)
1238 0 : *pszEOL = 0;
1239 :
1240 : /* Remove optional double-quote character around boundary name */
1241 2 : if (pszBoundary[0] == '"')
1242 : {
1243 0 : pszBoundary ++;
1244 0 : char* pszLastDoubleQuote = strrchr(pszBoundary, '"');
1245 0 : if (pszLastDoubleQuote)
1246 0 : *pszLastDoubleQuote = 0;
1247 : }
1248 :
1249 2 : osBoundary = "--";
1250 2 : osBoundary += pszBoundary;
1251 :
1252 : /* -------------------------------------------------------------------- */
1253 : /* Find the start of the first chunk. */
1254 : /* -------------------------------------------------------------------- */
1255 2 : pszNext = strstr(pBuffer,osBoundary.c_str());
1256 2 : if( pszNext == NULL )
1257 : {
1258 0 : CPLError( CE_Failure, CPLE_AppDefined, "No parts found." );
1259 0 : goto end;
1260 : }
1261 :
1262 2 : pszNext += strlen(osBoundary);
1263 4 : while( *pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0' )
1264 0 : pszNext++;
1265 2 : if( *pszNext == '\r' )
1266 2 : pszNext++;
1267 2 : if( *pszNext == '\n' )
1268 2 : pszNext++;
1269 :
1270 : /* -------------------------------------------------------------------- */
1271 : /* Loop over parts... */
1272 : /* -------------------------------------------------------------------- */
1273 258 : while( iPart < nRanges )
1274 : {
1275 : /* -------------------------------------------------------------------- */
1276 : /* Collect headers. */
1277 : /* -------------------------------------------------------------------- */
1278 256 : int bExpectedRange = FALSE;
1279 :
1280 1024 : while( *pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0' )
1281 : {
1282 512 : char *pszEOL = strstr(pszNext,"\n");
1283 :
1284 512 : if( pszEOL == NULL )
1285 : {
1286 : CPLError(CE_Failure, CPLE_AppDefined,
1287 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1288 0 : goto end;
1289 : }
1290 :
1291 512 : *pszEOL = '\0';
1292 512 : int bRestoreAntislashR = FALSE;
1293 512 : if (pszEOL - pszNext > 1 && pszEOL[-1] == '\r')
1294 : {
1295 512 : bRestoreAntislashR = TRUE;
1296 512 : pszEOL[-1] = '\0';
1297 : }
1298 :
1299 512 : if (EQUALN(pszNext, "Content-Range: bytes ", strlen("Content-Range: bytes ")))
1300 : {
1301 256 : bExpectedRange = TRUE; /* FIXME */
1302 : }
1303 :
1304 512 : if (bRestoreAntislashR)
1305 512 : pszEOL[-1] = '\r';
1306 512 : *pszEOL = '\n';
1307 :
1308 512 : pszNext = pszEOL + 1;
1309 : }
1310 :
1311 256 : if (!bExpectedRange)
1312 : {
1313 : CPLError(CE_Failure, CPLE_AppDefined,
1314 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1315 0 : goto end;
1316 : }
1317 :
1318 256 : if( *pszNext == '\r' )
1319 256 : pszNext++;
1320 256 : if( *pszNext == '\n' )
1321 256 : pszNext++;
1322 :
1323 : /* -------------------------------------------------------------------- */
1324 : /* Work out the data block size. */
1325 : /* -------------------------------------------------------------------- */
1326 256 : size_t nBytesAvail = nSize - (pszNext - pBuffer);
1327 :
1328 0 : while(TRUE)
1329 : {
1330 256 : if (nBytesAvail < panSizes[iRange])
1331 : {
1332 : CPLError(CE_Failure, CPLE_AppDefined,
1333 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1334 0 : goto end;
1335 : }
1336 :
1337 256 : memcpy(ppData[iRange], pszNext, panSizes[iRange]);
1338 256 : pszNext += panSizes[iRange];
1339 256 : nBytesAvail -= panSizes[iRange];
1340 1018 : if( iRange + 1 < nRanges &&
1341 762 : panOffsets[iRange] + panSizes[iRange] == panOffsets[iRange + 1] )
1342 : {
1343 0 : iRange++;
1344 : }
1345 : else
1346 : break;
1347 : }
1348 :
1349 256 : iPart ++;
1350 256 : iRange ++;
1351 :
1352 1024 : while( nBytesAvail > 0
1353 : && (*pszNext != '-'
1354 : || strncmp(pszNext,osBoundary,strlen(osBoundary)) != 0) )
1355 : {
1356 512 : pszNext++;
1357 512 : nBytesAvail--;
1358 : }
1359 :
1360 256 : if( nBytesAvail == 0 )
1361 : {
1362 : CPLError(CE_Failure, CPLE_AppDefined,
1363 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1364 0 : goto end;
1365 : }
1366 :
1367 256 : pszNext += strlen(osBoundary);
1368 256 : if( strncmp(pszNext,"--",2) == 0 )
1369 : {
1370 : /* End of multipart */
1371 2 : break;
1372 : }
1373 :
1374 254 : if( *pszNext == '\r' )
1375 254 : pszNext++;
1376 254 : if( *pszNext == '\n' )
1377 254 : pszNext++;
1378 : else
1379 : {
1380 : CPLError(CE_Failure, CPLE_AppDefined,
1381 0 : "Error while parsing multipart content (at line %d)", __LINE__);
1382 0 : goto end;
1383 : }
1384 : }
1385 :
1386 2 : if (iPart == nMergedRanges)
1387 2 : nRet = 0;
1388 : else
1389 : CPLError(CE_Failure, CPLE_AppDefined,
1390 0 : "Got only %d parts, where %d were expected", iPart, nMergedRanges);
1391 :
1392 : end:
1393 2 : CPLFree(sWriteFuncData.pBuffer);
1394 2 : CPLFree(sWriteFuncHeaderData.pBuffer);
1395 :
1396 2 : return nRet;
1397 : }
1398 :
1399 : /************************************************************************/
1400 : /* Write() */
1401 : /************************************************************************/
1402 :
1403 0 : size_t VSICurlHandle::Write( const void *pBuffer, size_t nSize, size_t nMemb )
1404 : {
1405 0 : return 0;
1406 : }
1407 :
1408 : /************************************************************************/
1409 : /* Eof() */
1410 : /************************************************************************/
1411 :
1412 :
1413 10 : int VSICurlHandle::Eof()
1414 : {
1415 10 : return bEOF;
1416 : }
1417 :
1418 : /************************************************************************/
1419 : /* Flush() */
1420 : /************************************************************************/
1421 :
1422 0 : int VSICurlHandle::Flush()
1423 : {
1424 0 : return 0;
1425 : }
1426 :
1427 : /************************************************************************/
1428 : /* Close() */
1429 : /************************************************************************/
1430 :
1431 126 : int VSICurlHandle::Close()
1432 : {
1433 126 : return 0;
1434 : }
1435 :
1436 :
1437 :
1438 :
1439 : /************************************************************************/
1440 : /* VSICurlFilesystemHandler() */
1441 : /************************************************************************/
1442 :
1443 1341 : VSICurlFilesystemHandler::VSICurlFilesystemHandler()
1444 : {
1445 1341 : hMutex = NULL;
1446 1341 : papsRegions = NULL;
1447 1341 : nRegions = 0;
1448 1341 : bUseCacheDisk = CSLTestBoolean(CPLGetConfigOption("CPL_VSIL_CURL_USE_CACHE", "NO"));
1449 1341 : }
1450 :
1451 : /************************************************************************/
1452 : /* ~VSICurlFilesystemHandler() */
1453 : /************************************************************************/
1454 :
1455 1297 : VSICurlFilesystemHandler::~VSICurlFilesystemHandler()
1456 : {
1457 : int i;
1458 1381 : for(i=0;i<nRegions;i++)
1459 : {
1460 84 : CPLFree(papsRegions[i]->pData);
1461 84 : CPLFree(papsRegions[i]);
1462 : }
1463 1297 : CPLFree(papsRegions);
1464 :
1465 1297 : std::map<CPLString, CachedFileProp*>::const_iterator iterCacheFileSize;
1466 :
1467 2387 : for( iterCacheFileSize = cacheFileSize.begin(); iterCacheFileSize != cacheFileSize.end(); iterCacheFileSize++ )
1468 : {
1469 1090 : CPLFree(iterCacheFileSize->second);
1470 : }
1471 :
1472 1297 : std::map<CPLString, CachedDirList*>::const_iterator iterCacheDirList;
1473 :
1474 1331 : for( iterCacheDirList = cacheDirList.begin(); iterCacheDirList != cacheDirList.end(); iterCacheDirList++ )
1475 : {
1476 34 : CSLDestroy(iterCacheDirList->second->papszFileList);
1477 34 : CPLFree(iterCacheDirList->second);
1478 : }
1479 :
1480 1297 : std::map<GIntBig, CachedConnection*>::const_iterator iterConnections;
1481 1299 : for( iterConnections = mapConnections.begin(); iterConnections != mapConnections.end(); iterConnections++ )
1482 : {
1483 2 : curl_easy_cleanup(iterConnections->second->hCurlHandle);
1484 2 : delete iterConnections->second;
1485 : }
1486 :
1487 1297 : if( hMutex != NULL )
1488 2 : CPLDestroyMutex( hMutex );
1489 1297 : hMutex = NULL;
1490 1297 : }
1491 :
1492 : /************************************************************************/
1493 : /* GetCurlHandleFor() */
1494 : /************************************************************************/
1495 :
1496 132 : CURL* VSICurlFilesystemHandler::GetCurlHandleFor(CPLString osURL)
1497 : {
1498 132 : CPLMutexHolder oHolder( &hMutex );
1499 :
1500 132 : std::map<GIntBig, CachedConnection*>::const_iterator iterConnections;
1501 :
1502 132 : iterConnections = mapConnections.find(CPLGetPID());
1503 132 : if (iterConnections == mapConnections.end())
1504 : {
1505 2 : CURL* hCurlHandle = curl_easy_init();
1506 2 : CachedConnection* psCachedConnection = new CachedConnection;
1507 2 : psCachedConnection->osURL = osURL;
1508 2 : psCachedConnection->hCurlHandle = hCurlHandle;
1509 2 : mapConnections[CPLGetPID()] = psCachedConnection;
1510 2 : return hCurlHandle;
1511 : }
1512 : else
1513 : {
1514 130 : CachedConnection* psCachedConnection = iterConnections->second;
1515 130 : if (osURL == psCachedConnection->osURL)
1516 66 : return psCachedConnection->hCurlHandle;
1517 :
1518 64 : const char* pszURL = osURL.c_str();
1519 64 : const char* pszEndOfServ = strchr(pszURL, '.');
1520 64 : if (pszEndOfServ != NULL)
1521 64 : pszEndOfServ = strchr(pszEndOfServ, '/');
1522 64 : if (pszEndOfServ == NULL)
1523 0 : pszURL = pszURL + strlen(pszURL);
1524 : int bReinitConnection = strncmp(psCachedConnection->osURL,
1525 64 : pszURL, pszEndOfServ-pszURL) != 0;
1526 :
1527 64 : if (bReinitConnection)
1528 : {
1529 14 : if (psCachedConnection->hCurlHandle)
1530 14 : curl_easy_cleanup(psCachedConnection->hCurlHandle);
1531 14 : psCachedConnection->hCurlHandle = curl_easy_init();
1532 : }
1533 64 : psCachedConnection->osURL = osURL;
1534 :
1535 64 : return psCachedConnection->hCurlHandle;
1536 0 : }
1537 : }
1538 :
1539 :
1540 : /************************************************************************/
1541 : /* GetRegionFromCacheDisk() */
1542 : /************************************************************************/
1543 :
1544 : const CachedRegion*
1545 0 : VSICurlFilesystemHandler::GetRegionFromCacheDisk(const char* pszURL,
1546 : vsi_l_offset nFileOffsetStart)
1547 : {
1548 0 : nFileOffsetStart = (nFileOffsetStart / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
1549 0 : VSILFILE* fp = VSIFOpenL(VSICurlGetCacheFileName(), "rb");
1550 0 : if (fp)
1551 : {
1552 0 : unsigned long pszURLHash = CPLHashSetHashStr(pszURL);
1553 : unsigned long pszURLHashCached;
1554 : vsi_l_offset nFileOffsetStartCached;
1555 : size_t nSizeCached;
1556 0 : while(TRUE)
1557 : {
1558 0 : if (VSIFReadL(&pszURLHashCached, 1, sizeof(unsigned long), fp) == 0)
1559 : break;
1560 0 : VSIFReadL(&nFileOffsetStartCached, 1, sizeof(vsi_l_offset), fp);
1561 0 : VSIFReadL(&nSizeCached, 1, sizeof(size_t), fp);
1562 0 : if (pszURLHash == pszURLHashCached &&
1563 : nFileOffsetStart == nFileOffsetStartCached)
1564 : {
1565 : if (ENABLE_DEBUG)
1566 0 : CPLDebug("VSICURL", "Got data at offset " CPL_FRMT_GUIB " from disk" , nFileOffsetStart);
1567 0 : if (nSizeCached)
1568 : {
1569 0 : char* pBuffer = (char*) CPLMalloc(nSizeCached);
1570 0 : VSIFReadL(pBuffer, 1, nSizeCached, fp);
1571 0 : AddRegion(pszURL, nFileOffsetStart, nSizeCached, pBuffer);
1572 0 : CPLFree(pBuffer);
1573 : }
1574 : else
1575 : {
1576 0 : AddRegion(pszURL, nFileOffsetStart, 0, NULL);
1577 : }
1578 0 : VSIFCloseL(fp);
1579 0 : return GetRegion(pszURL, nFileOffsetStart);
1580 : }
1581 : else
1582 : {
1583 0 : VSIFSeekL(fp, nSizeCached, SEEK_CUR);
1584 : }
1585 : }
1586 0 : VSIFCloseL(fp);
1587 : }
1588 0 : return NULL;
1589 : }
1590 :
1591 :
1592 : /************************************************************************/
1593 : /* AddRegionToCacheDisk() */
1594 : /************************************************************************/
1595 :
1596 0 : void VSICurlFilesystemHandler::AddRegionToCacheDisk(CachedRegion* psRegion)
1597 : {
1598 0 : VSILFILE* fp = VSIFOpenL(VSICurlGetCacheFileName(), "r+b");
1599 0 : if (fp)
1600 : {
1601 : unsigned long pszURLHashCached;
1602 : vsi_l_offset nFileOffsetStartCached;
1603 : size_t nSizeCached;
1604 0 : while(TRUE)
1605 : {
1606 0 : if (VSIFReadL(&pszURLHashCached, 1, sizeof(unsigned long), fp) == 0)
1607 : break;
1608 0 : VSIFReadL(&nFileOffsetStartCached, 1, sizeof(vsi_l_offset), fp);
1609 0 : VSIFReadL(&nSizeCached, 1, sizeof(size_t), fp);
1610 0 : if (psRegion->pszURLHash == pszURLHashCached &&
1611 : psRegion->nFileOffsetStart == nFileOffsetStartCached)
1612 : {
1613 0 : CPLAssert(psRegion->nSize == nSizeCached);
1614 0 : VSIFCloseL(fp);
1615 0 : return;
1616 : }
1617 : else
1618 : {
1619 0 : VSIFSeekL(fp, nSizeCached, SEEK_CUR);
1620 : }
1621 : }
1622 : }
1623 : else
1624 : {
1625 0 : fp = VSIFOpenL(VSICurlGetCacheFileName(), "wb");
1626 : }
1627 0 : if (fp)
1628 : {
1629 : if (ENABLE_DEBUG)
1630 0 : CPLDebug("VSICURL", "Write data at offset " CPL_FRMT_GUIB " to disk" , psRegion->nFileOffsetStart);
1631 0 : VSIFWriteL(&psRegion->pszURLHash, 1, sizeof(unsigned long), fp);
1632 0 : VSIFWriteL(&psRegion->nFileOffsetStart, 1, sizeof(vsi_l_offset), fp);
1633 0 : VSIFWriteL(&psRegion->nSize, 1, sizeof(size_t), fp);
1634 0 : if (psRegion->nSize)
1635 0 : VSIFWriteL(psRegion->pData, 1, psRegion->nSize, fp);
1636 :
1637 0 : VSIFCloseL(fp);
1638 : }
1639 0 : return;
1640 : }
1641 :
1642 :
1643 : /************************************************************************/
1644 : /* GetRegion() */
1645 : /************************************************************************/
1646 :
1647 12284 : const CachedRegion* VSICurlFilesystemHandler::GetRegion(const char* pszURL,
1648 : vsi_l_offset nFileOffsetStart)
1649 : {
1650 12284 : CPLMutexHolder oHolder( &hMutex );
1651 :
1652 12284 : unsigned long pszURLHash = CPLHashSetHashStr(pszURL);
1653 :
1654 12284 : nFileOffsetStart = (nFileOffsetStart / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
1655 : int i;
1656 14210 : for(i=0;i<nRegions;i++)
1657 : {
1658 14124 : CachedRegion* psRegion = papsRegions[i];
1659 14124 : if (psRegion->pszURLHash == pszURLHash &&
1660 : nFileOffsetStart == psRegion->nFileOffsetStart)
1661 : {
1662 12198 : memmove(papsRegions + 1, papsRegions, i * sizeof(CachedRegion*));
1663 12198 : papsRegions[0] = psRegion;
1664 12198 : return psRegion;
1665 : }
1666 : }
1667 86 : if (bUseCacheDisk)
1668 0 : return GetRegionFromCacheDisk(pszURL, nFileOffsetStart);
1669 86 : return NULL;
1670 : }
1671 :
1672 : /************************************************************************/
1673 : /* AddRegion() */
1674 : /************************************************************************/
1675 :
1676 84 : void VSICurlFilesystemHandler::AddRegion(const char* pszURL,
1677 : vsi_l_offset nFileOffsetStart,
1678 : size_t nSize,
1679 : const char *pData)
1680 : {
1681 84 : CPLMutexHolder oHolder( &hMutex );
1682 :
1683 84 : unsigned long pszURLHash = CPLHashSetHashStr(pszURL);
1684 :
1685 : CachedRegion* psRegion;
1686 84 : if (nRegions == N_MAX_REGIONS)
1687 : {
1688 0 : psRegion = papsRegions[N_MAX_REGIONS-1];
1689 0 : memmove(papsRegions + 1, papsRegions, (N_MAX_REGIONS-1) * sizeof(CachedRegion*));
1690 0 : papsRegions[0] = psRegion;
1691 0 : CPLFree(psRegion->pData);
1692 : }
1693 : else
1694 : {
1695 84 : papsRegions = (CachedRegion**) CPLRealloc(papsRegions, (nRegions + 1) * sizeof(CachedRegion*));
1696 84 : if (nRegions)
1697 82 : memmove(papsRegions + 1, papsRegions, nRegions * sizeof(CachedRegion*));
1698 84 : nRegions ++;
1699 84 : papsRegions[0] = psRegion = (CachedRegion*) CPLMalloc(sizeof(CachedRegion));
1700 : }
1701 :
1702 84 : psRegion->pszURLHash = pszURLHash;
1703 84 : psRegion->nFileOffsetStart = nFileOffsetStart;
1704 84 : psRegion->nSize = nSize;
1705 84 : psRegion->pData = (nSize) ? (char*) CPLMalloc(nSize) : NULL;
1706 84 : if (nSize)
1707 84 : memcpy(psRegion->pData, pData, nSize);
1708 :
1709 84 : if (bUseCacheDisk)
1710 0 : AddRegionToCacheDisk(psRegion);
1711 84 : }
1712 :
1713 : /************************************************************************/
1714 : /* GetCachedFileProp() */
1715 : /************************************************************************/
1716 :
1717 5624 : CachedFileProp* VSICurlFilesystemHandler::GetCachedFileProp(const char* pszURL)
1718 : {
1719 5624 : CPLMutexHolder oHolder( &hMutex );
1720 :
1721 5624 : CachedFileProp* cachedFileProp = cacheFileSize[pszURL];
1722 5624 : if (cachedFileProp == NULL)
1723 : {
1724 1090 : cachedFileProp = (CachedFileProp*) CPLMalloc(sizeof(CachedFileProp));
1725 1090 : cachedFileProp->eExists = EXIST_UNKNOWN;
1726 1090 : cachedFileProp->bHastComputedFileSize = FALSE;
1727 1090 : cachedFileProp->fileSize = 0;
1728 1090 : cachedFileProp->bIsDirectory = FALSE;
1729 1090 : cacheFileSize[pszURL] = cachedFileProp;
1730 : }
1731 :
1732 5624 : return cachedFileProp;
1733 : }
1734 :
1735 : /************************************************************************/
1736 : /* Open() */
1737 : /************************************************************************/
1738 :
1739 152 : VSIVirtualHandle* VSICurlFilesystemHandler::Open( const char *pszFilename,
1740 : const char *pszAccess)
1741 : {
1742 152 : if (strchr(pszAccess, 'w') != NULL ||
1743 : strchr(pszAccess, '+') != NULL)
1744 : {
1745 : CPLError(CE_Failure, CPLE_AppDefined,
1746 0 : "Only read-only mode is supported for /vsicurl");
1747 0 : return NULL;
1748 : }
1749 :
1750 : const char* pszOptionVal =
1751 152 : CPLGetConfigOption( "GDAL_DISABLE_READDIR_ON_OPEN", "NO" );
1752 : int bSkipReadDir = EQUAL(pszOptionVal, "EMPTY_DIR") ||
1753 152 : CSLTestBoolean(pszOptionVal);
1754 :
1755 152 : CPLString osFilename(pszFilename);
1756 152 : int bGotFileList = TRUE;
1757 152 : if (strchr(CPLGetFilename(osFilename), '.') != NULL &&
1758 : strncmp(CPLGetExtension(osFilename), "zip", 3) != 0 && !bSkipReadDir)
1759 : {
1760 94 : char** papszFileList = ReadDir(CPLGetDirname(osFilename), &bGotFileList);
1761 94 : int bFound = (VSICurlIsFileInList(papszFileList, CPLGetFilename(osFilename)) != -1);
1762 94 : CSLDestroy(papszFileList);
1763 94 : if (bGotFileList && !bFound)
1764 : {
1765 22 : return NULL;
1766 : }
1767 : }
1768 :
1769 130 : VSICurlHandle* poHandle = new VSICurlHandle( this, osFilename + strlen("/vsicurl/"));
1770 130 : if (!bGotFileList)
1771 : {
1772 : /* If we didn't get a filelist, check that the file really exists */
1773 34 : if (!poHandle->Exists())
1774 : {
1775 4 : delete poHandle;
1776 4 : poHandle = NULL;
1777 : }
1778 : }
1779 130 : return poHandle;
1780 : }
1781 :
1782 : /************************************************************************/
1783 : /* VSICurlParserFindEOL() */
1784 : /* */
1785 : /* Small helper function for VSICurlPaseHTMLFileList() to find */
1786 : /* the end of a line in the directory listing. Either a <br> */
1787 : /* or newline. */
1788 : /************************************************************************/
1789 :
1790 1330 : static char *VSICurlParserFindEOL( char *pszData )
1791 :
1792 : {
1793 72670 : while( *pszData != '\0' && *pszData != '\n' && !EQUALN(pszData,"<br>",4) )
1794 70010 : pszData++;
1795 :
1796 1330 : if( *pszData == '\0' )
1797 30 : return NULL;
1798 : else
1799 1300 : return pszData;
1800 : }
1801 :
1802 :
1803 : /************************************************************************/
1804 : /* VSICurlParseHTMLDateTimeFileSize() */
1805 : /************************************************************************/
1806 :
1807 : static const char* const apszMonths[] = { "January", "February", "March",
1808 : "April", "May", "June", "July",
1809 : "August", "September", "October",
1810 : "November", "December" };
1811 :
1812 1092 : static int VSICurlParseHTMLDateTimeFileSize(const char* pszStr,
1813 : struct tm& brokendowntime,
1814 : GUIntBig& nFileSize,
1815 : GIntBig& mTime)
1816 : {
1817 : int iMonth;
1818 14092 : for(iMonth=0;iMonth<12;iMonth++)
1819 : {
1820 : char szMonth[32];
1821 13012 : szMonth[0] = '-';
1822 13012 : memcpy(szMonth + 1, apszMonths[iMonth], 3);
1823 13012 : szMonth[4] = '-';
1824 13012 : szMonth[5] = '\0';
1825 13012 : const char* pszMonthFound = strstr(pszStr, szMonth);
1826 13012 : if (pszMonthFound)
1827 : {
1828 : /* Format of Apache, like in http://download.osgeo.org/gdal/data/gtiff/ */
1829 : /* "17-May-2010 12:26" */
1830 36 : if (pszMonthFound - pszStr > 2 && strlen(pszMonthFound) > 15 &&
1831 24 : pszMonthFound[-2 + 11] == ' ' && pszMonthFound[-2 + 14] == ':')
1832 : {
1833 12 : pszMonthFound -= 2;
1834 12 : int nDay = atoi(pszMonthFound);
1835 12 : int nYear = atoi(pszMonthFound + 7);
1836 12 : int nHour = atoi(pszMonthFound + 12);
1837 12 : int nMin = atoi(pszMonthFound + 15);
1838 12 : if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1839 : nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1840 : {
1841 12 : brokendowntime.tm_year = nYear - 1900;
1842 12 : brokendowntime.tm_mon = iMonth;
1843 12 : brokendowntime.tm_mday = nDay;
1844 12 : brokendowntime.tm_hour = nHour;
1845 12 : brokendowntime.tm_min = nMin;
1846 12 : mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1847 :
1848 12 : return TRUE;
1849 : }
1850 : }
1851 0 : return FALSE;
1852 : }
1853 :
1854 : /* Microsoft IIS */
1855 13000 : szMonth[0] = ' ';
1856 13000 : strcpy(szMonth + 1, apszMonths[iMonth]);
1857 13000 : strcat(szMonth, " ");
1858 13000 : pszMonthFound = strstr(pszStr, szMonth);
1859 13000 : if (pszMonthFound)
1860 : {
1861 0 : int nLenMonth = strlen(apszMonths[iMonth]);
1862 0 : if (pszMonthFound - pszStr > 2 &&
1863 0 : pszMonthFound[-1] != ',' &&
1864 0 : pszMonthFound[-2] != ' ' &&
1865 : (int)strlen(pszMonthFound-2) > 2 + 1 + nLenMonth + 1 + 4 + 1 + 5 + 1 + 4)
1866 : {
1867 : /* Format of http://ortho.linz.govt.nz/tifs/1994_95/ */
1868 : /* " Friday, 21 April 2006 12:05 p.m. 48062343 m35a_fy_94_95.tif" */
1869 0 : pszMonthFound -= 2;
1870 0 : int nDay = atoi(pszMonthFound);
1871 0 : int nCurOffset = 2 + 1 + nLenMonth + 1;
1872 0 : int nYear = atoi(pszMonthFound + nCurOffset);
1873 0 : nCurOffset += 4 + 1;
1874 0 : int nHour = atoi(pszMonthFound + nCurOffset);
1875 0 : if (nHour < 10)
1876 0 : nCurOffset += 1 + 1;
1877 : else
1878 0 : nCurOffset += 2 + 1;
1879 0 : int nMin = atoi(pszMonthFound + nCurOffset);
1880 0 : nCurOffset += 2 + 1;
1881 0 : if (strncmp(pszMonthFound + nCurOffset, "p.m.", 4) == 0)
1882 0 : nHour += 12;
1883 0 : else if (strncmp(pszMonthFound + nCurOffset, "a.m.", 4) != 0)
1884 0 : nHour = -1;
1885 0 : nCurOffset += 4;
1886 :
1887 0 : const char* pszFilesize = pszMonthFound + nCurOffset;
1888 0 : while(*pszFilesize == ' ')
1889 0 : pszFilesize ++;
1890 0 : if (*pszFilesize >= '1' && *pszFilesize <= '9')
1891 0 : nFileSize = CPLScanUIntBig(pszFilesize, strlen(pszFilesize));
1892 :
1893 0 : if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1894 : nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1895 : {
1896 0 : brokendowntime.tm_year = nYear - 1900;
1897 0 : brokendowntime.tm_mon = iMonth;
1898 0 : brokendowntime.tm_mday = nDay;
1899 0 : brokendowntime.tm_hour = nHour;
1900 0 : brokendowntime.tm_min = nMin;
1901 0 : mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1902 :
1903 0 : return TRUE;
1904 : }
1905 0 : nFileSize = 0;
1906 : }
1907 0 : else if (pszMonthFound - pszStr > 1 &&
1908 0 : pszMonthFound[-1] == ',' &&
1909 : (int)strlen(pszMonthFound) > 1 + nLenMonth + 1 + 2 + 1 + 1 + 4 + 1 + 5 + 1 + 2)
1910 : {
1911 : /* Format of http://publicfiles.dep.state.fl.us/dear/BWR_GIS/2007NWFLULC/ */
1912 : /* " Sunday, June 20, 2010 6:46 PM 233170905 NWF2007LULCForSDE.zip" */
1913 0 : pszMonthFound += 1;
1914 0 : int nCurOffset = nLenMonth + 1;
1915 0 : int nDay = atoi(pszMonthFound + nCurOffset);
1916 0 : nCurOffset += 2 + 1 + 1;
1917 0 : int nYear = atoi(pszMonthFound + nCurOffset);
1918 0 : nCurOffset += 4 + 1;
1919 0 : int nHour = atoi(pszMonthFound + nCurOffset);
1920 0 : nCurOffset += 2 + 1;
1921 0 : int nMin = atoi(pszMonthFound + nCurOffset);
1922 0 : nCurOffset += 2 + 1;
1923 0 : if (strncmp(pszMonthFound + nCurOffset, "PM", 2) == 0)
1924 0 : nHour += 12;
1925 0 : else if (strncmp(pszMonthFound + nCurOffset, "AM", 2) != 0)
1926 0 : nHour = -1;
1927 0 : nCurOffset += 2;
1928 :
1929 0 : const char* pszFilesize = pszMonthFound + nCurOffset;
1930 0 : while(*pszFilesize == ' ')
1931 0 : pszFilesize ++;
1932 0 : if (*pszFilesize >= '1' && *pszFilesize <= '9')
1933 0 : nFileSize = CPLScanUIntBig(pszFilesize, strlen(pszFilesize));
1934 :
1935 0 : if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1936 : nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1937 : {
1938 0 : brokendowntime.tm_year = nYear - 1900;
1939 0 : brokendowntime.tm_mon = iMonth;
1940 0 : brokendowntime.tm_mday = nDay;
1941 0 : brokendowntime.tm_hour = nHour;
1942 0 : brokendowntime.tm_min = nMin;
1943 0 : mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1944 :
1945 0 : return TRUE;
1946 : }
1947 0 : nFileSize = 0;
1948 : }
1949 0 : return FALSE;
1950 : }
1951 : }
1952 :
1953 1080 : return FALSE;
1954 : }
1955 :
1956 : /************************************************************************/
1957 : /* ParseHTMLFileList() */
1958 : /* */
1959 : /* Parse a file list document and return all the components. */
1960 : /************************************************************************/
1961 :
1962 30 : char** VSICurlFilesystemHandler::ParseHTMLFileList(const char* pszFilename,
1963 : char* pszData,
1964 : int* pbGotFileList)
1965 : {
1966 30 : CPLStringList oFileList;
1967 30 : char* pszLine = pszData;
1968 : char* c;
1969 30 : int nCount = 0;
1970 30 : int bIsHTMLDirList = FALSE;
1971 30 : CPLString osExpectedString;
1972 30 : CPLString osExpectedString2;
1973 30 : CPLString osExpectedString3;
1974 30 : CPLString osExpectedString4;
1975 30 : CPLString osExpectedString_unescaped;
1976 :
1977 30 : *pbGotFileList = FALSE;
1978 :
1979 : const char* pszDir;
1980 30 : if (EQUALN(pszFilename, "/vsicurl/http://", strlen("/vsicurl/http://")))
1981 30 : pszDir = strchr(pszFilename + strlen("/vsicurl/http://"), '/');
1982 0 : else if (EQUALN(pszFilename, "/vsicurl/https://", strlen("/vsicurl/https://")))
1983 0 : pszDir = strchr(pszFilename + strlen("/vsicurl/https://"), '/');
1984 : else
1985 0 : pszDir = strchr(pszFilename + strlen("/vsicurl/ftp://"), '/');
1986 30 : if (pszDir == NULL)
1987 0 : pszDir = "";
1988 : /* Apache */
1989 30 : osExpectedString = "<title>Index of ";
1990 30 : osExpectedString += pszDir;
1991 30 : osExpectedString += "</title>";
1992 : /* shttpd */
1993 30 : osExpectedString2 = "<title>Index of ";
1994 30 : osExpectedString2 += pszDir;
1995 30 : osExpectedString2 += "/</title>";
1996 : /* FTP */
1997 30 : osExpectedString3 = "FTP Listing of ";
1998 30 : osExpectedString3 += pszDir;
1999 30 : osExpectedString3 += "/";
2000 : /* Apache 1.3.33 */
2001 30 : osExpectedString4 = "<TITLE>Index of ";
2002 30 : osExpectedString4 += pszDir;
2003 30 : osExpectedString4 += "</TITLE>";
2004 :
2005 : /* The listing of http://dds.cr.usgs.gov/srtm/SRTM_image_sample/picture%20examples/ */
2006 : /* has "<title>Index of /srtm/SRTM_image_sample/picture examples</title>" so we must */
2007 : /* try unescaped %20 also */
2008 : /* Similar with http://datalib.usask.ca/gis/Data/Central_America_goodbutdoweown%3f/ */
2009 30 : if (strchr(pszDir, '%'))
2010 : {
2011 0 : char* pszUnescapedDir = CPLUnescapeString(pszDir, NULL, CPLES_URL);
2012 0 : osExpectedString_unescaped = "<title>Index of ";
2013 0 : osExpectedString_unescaped += pszUnescapedDir;
2014 0 : osExpectedString_unescaped += "</title>";
2015 0 : CPLFree(pszUnescapedDir);
2016 : }
2017 :
2018 30 : int nCountTable = 0;
2019 :
2020 1360 : while( (c = VSICurlParserFindEOL( pszLine )) != NULL )
2021 : {
2022 1300 : *c = 0;
2023 :
2024 : /* To avoid false positive on pages such as http://www.ngs.noaa.gov/PC_PROD/USGG2009BETA */
2025 : /* This is a heuristics, but normal HTML listing of files have not more than one table */
2026 1300 : if (strstr(pszLine, "<table"))
2027 : {
2028 4 : nCountTable ++;
2029 4 : if (nCountTable == 2)
2030 : {
2031 0 : *pbGotFileList = FALSE;
2032 0 : return NULL;
2033 : }
2034 : }
2035 :
2036 1300 : if (!bIsHTMLDirList &&
2037 : (strstr(pszLine, osExpectedString.c_str()) ||
2038 : strstr(pszLine, osExpectedString2.c_str()) ||
2039 : strstr(pszLine, osExpectedString3.c_str()) ||
2040 : strstr(pszLine, osExpectedString4.c_str()) ||
2041 : (osExpectedString_unescaped.size() != 0 && strstr(pszLine, osExpectedString_unescaped.c_str()))))
2042 : {
2043 4 : bIsHTMLDirList = TRUE;
2044 4 : *pbGotFileList = TRUE;
2045 : }
2046 : /* Subversion HTTP listing */
2047 : /* or Microsoft-IIS/6.0 listing (e.g. http://ortho.linz.govt.nz/tifs/2005_06/) */
2048 1322 : else if (!bIsHTMLDirList && strstr(pszLine, "<title>"))
2049 : {
2050 : /* Detect something like : <html><head><title>gdal - Revision 20739: /trunk/autotest/gcore/data</title></head> */
2051 : /* The annoying thing is that what is after ': ' is a subpart of what is after http://server/ */
2052 26 : char* pszSubDir = strstr(pszLine, ": ");
2053 26 : if (pszSubDir == NULL)
2054 : /* or <title>ortho.linz.govt.nz - /tifs/2005_06/</title> */
2055 0 : pszSubDir = strstr(pszLine, "- ");
2056 26 : if (pszSubDir)
2057 : {
2058 26 : pszSubDir += 2;
2059 26 : char* pszTmp = strstr(pszSubDir, "</title>");
2060 26 : if (pszTmp)
2061 : {
2062 26 : if (pszTmp[-1] == '/')
2063 0 : pszTmp[-1] = 0;
2064 : else
2065 26 : *pszTmp = 0;
2066 26 : if (strstr(pszDir, pszSubDir))
2067 : {
2068 26 : bIsHTMLDirList = TRUE;
2069 26 : *pbGotFileList = TRUE;
2070 : }
2071 : }
2072 : }
2073 : }
2074 1270 : else if (bIsHTMLDirList &&
2075 : (strstr(pszLine, "<a href=\"") != NULL || strstr(pszLine, "<A HREF=\"") != NULL) &&
2076 : strstr(pszLine, "<a href=\"http://") == NULL && /* exclude absolute links, like to subversion home */
2077 : strstr(pszLine, "Parent Directory") == NULL /* exclude parent directory */)
2078 : {
2079 1096 : char *beginFilename = strstr(pszLine, "<a href=\"");
2080 1096 : if (beginFilename == NULL)
2081 0 : beginFilename = strstr(pszLine, "<A HREF=\"");
2082 1096 : beginFilename += strlen("<a href=\"");
2083 1096 : char *endQuote = strchr(beginFilename, '"');
2084 1096 : if (endQuote && strncmp(beginFilename, "?C=", 3) != 0 && strncmp(beginFilename, "?N=", 3) != 0)
2085 : {
2086 : struct tm brokendowntime;
2087 1092 : memset(&brokendowntime, 0, sizeof(brokendowntime));
2088 1092 : GUIntBig nFileSize = 0;
2089 1092 : GIntBig mTime = 0;
2090 :
2091 : VSICurlParseHTMLDateTimeFileSize(pszLine,
2092 : brokendowntime,
2093 : nFileSize,
2094 1092 : mTime);
2095 :
2096 1092 : *endQuote = '\0';
2097 :
2098 : /* Remove trailing slash, that are returned for directories by */
2099 : /* Apache */
2100 1092 : int bIsDirectory = FALSE;
2101 1092 : if (endQuote[-1] == '/')
2102 : {
2103 48 : bIsDirectory = TRUE;
2104 48 : endQuote[-1] = 0;
2105 : }
2106 :
2107 : /* shttpd links include slashes from the root directory. Skip them */
2108 2184 : while(strchr(beginFilename, '/'))
2109 0 : beginFilename = strchr(beginFilename, '/') + 1;
2110 :
2111 1092 : if (strcmp(beginFilename, ".") != 0 &&
2112 : strcmp(beginFilename, "..") != 0)
2113 : {
2114 : CPLString osCachedFilename =
2115 1066 : CPLSPrintf("%s/%s", pszFilename + strlen("/vsicurl/"), beginFilename);
2116 1066 : CachedFileProp* cachedFileProp = GetCachedFileProp(osCachedFilename);
2117 1066 : cachedFileProp->eExists = EXIST_YES;
2118 1066 : cachedFileProp->bIsDirectory = bIsDirectory;
2119 1066 : cachedFileProp->mTime = mTime;
2120 1066 : cachedFileProp->bHastComputedFileSize = nFileSize > 0;
2121 1066 : cachedFileProp->fileSize = nFileSize;
2122 :
2123 1066 : oFileList.AddString( beginFilename );
2124 : if (ENABLE_DEBUG)
2125 : CPLDebug("VSICURL", "File[%d] = %s, is_dir = %d, size = " CPL_FRMT_GUIB ", time = %04d/%02d/%02d %02d:%02d:%02d",
2126 : nCount, beginFilename, bIsDirectory, nFileSize,
2127 : brokendowntime.tm_year + 1900, brokendowntime.tm_mon + 1, brokendowntime.tm_mday,
2128 1066 : brokendowntime.tm_hour, brokendowntime.tm_min, brokendowntime.tm_sec);
2129 1066 : nCount ++;
2130 : }
2131 : }
2132 : }
2133 1300 : pszLine = c + 1;
2134 : }
2135 :
2136 30 : return oFileList.StealList();
2137 : }
2138 :
2139 :
2140 : /************************************************************************/
2141 : /* VSICurlGetToken() */
2142 : /************************************************************************/
2143 :
2144 32 : static char* VSICurlGetToken(char* pszCurPtr, char** ppszNextToken)
2145 : {
2146 32 : if (pszCurPtr == NULL)
2147 0 : return NULL;
2148 :
2149 64 : while((*pszCurPtr) == ' ')
2150 0 : pszCurPtr ++;
2151 32 : if (*pszCurPtr == '\0')
2152 0 : return NULL;
2153 :
2154 32 : char* pszToken = pszCurPtr;
2155 208 : while((*pszCurPtr) != ' ' && (*pszCurPtr) != '\0')
2156 144 : pszCurPtr ++;
2157 32 : if (*pszCurPtr == '\0')
2158 0 : *ppszNextToken = NULL;
2159 : else
2160 : {
2161 32 : *pszCurPtr = '\0';
2162 32 : pszCurPtr ++;
2163 112 : while((*pszCurPtr) == ' ')
2164 48 : pszCurPtr ++;
2165 32 : *ppszNextToken = pszCurPtr;
2166 : }
2167 :
2168 32 : return pszToken;
2169 : }
2170 :
2171 : /************************************************************************/
2172 : /* VSICurlParseFullFTPLine() */
2173 : /************************************************************************/
2174 :
2175 : /* Parse lines like the following ones :
2176 : -rw-r--r-- 1 10003 100 430 Jul 04 2008 COPYING
2177 : lrwxrwxrwx 1 ftp ftp 28 Jun 14 14:13 MPlayer -> mirrors/mplayerhq.hu/MPlayer
2178 : -rw-r--r-- 1 ftp ftp 725614592 May 13 20:13 Fedora-15-x86_64-Live-KDE.iso
2179 : drwxr-xr-x 280 1003 1003 6656 Aug 26 04:17 gnu
2180 : */
2181 :
2182 4 : static int VSICurlParseFullFTPLine(char* pszLine,
2183 : char*& pszFilename,
2184 : int& bSizeValid,
2185 : GUIntBig& nSize,
2186 : int& bIsDirectory,
2187 : GIntBig& nUnixTime)
2188 : {
2189 4 : char* pszNextToken = pszLine;
2190 4 : char* pszPermissions = VSICurlGetToken(pszNextToken, &pszNextToken);
2191 4 : if (pszPermissions == NULL || strlen(pszPermissions) != 10)
2192 0 : return FALSE;
2193 4 : bIsDirectory = (pszPermissions[0] == 'd');
2194 :
2195 : int i;
2196 16 : for(i = 0; i < 3; i++)
2197 : {
2198 12 : if (VSICurlGetToken(pszNextToken, &pszNextToken) == NULL)
2199 0 : return FALSE;
2200 : }
2201 :
2202 4 : char* pszSize = VSICurlGetToken(pszNextToken, &pszNextToken);
2203 4 : if (pszSize == NULL)
2204 0 : return FALSE;
2205 :
2206 4 : if (pszPermissions[0] == '-')
2207 : {
2208 : /* Regular file */
2209 4 : bSizeValid = TRUE;
2210 4 : nSize = CPLScanUIntBig(pszSize, strlen(pszSize));
2211 : }
2212 :
2213 : struct tm brokendowntime;
2214 4 : memset(&brokendowntime, 0, sizeof(brokendowntime));
2215 4 : int bBrokenDownTimeValid = TRUE;
2216 :
2217 4 : char* pszMonth = VSICurlGetToken(pszNextToken, &pszNextToken);
2218 4 : if (pszMonth == NULL || strlen(pszMonth) != 3)
2219 0 : return FALSE;
2220 :
2221 40 : for(i = 0; i < 12; i++)
2222 : {
2223 40 : if (EQUALN(pszMonth, apszMonths[i], 3))
2224 4 : break;
2225 : }
2226 4 : if (i < 12)
2227 4 : brokendowntime.tm_mon = i;
2228 : else
2229 0 : bBrokenDownTimeValid = FALSE;
2230 :
2231 4 : char* pszDay = VSICurlGetToken(pszNextToken, &pszNextToken);
2232 4 : if (pszDay == NULL || (strlen(pszDay) != 1 && strlen(pszDay) != 2))
2233 0 : return FALSE;
2234 4 : int nDay = atoi(pszDay);
2235 8 : if (nDay >= 1 && nDay <= 31)
2236 4 : brokendowntime.tm_mday = nDay;
2237 : else
2238 0 : bBrokenDownTimeValid = FALSE;
2239 :
2240 4 : char* pszHourOrYear = VSICurlGetToken(pszNextToken, &pszNextToken);
2241 4 : if (pszHourOrYear == NULL || (strlen(pszHourOrYear) != 4 && strlen(pszHourOrYear) != 5))
2242 0 : return FALSE;
2243 4 : if (strlen(pszHourOrYear) == 4)
2244 : {
2245 4 : brokendowntime.tm_year = atoi(pszHourOrYear) - 1900;
2246 : }
2247 : else
2248 : {
2249 : time_t sTime;
2250 0 : time(&sTime);
2251 : struct tm currentBrokendowntime;
2252 0 : CPLUnixTimeToYMDHMS((GIntBig)sTime, ¤tBrokendowntime);
2253 0 : brokendowntime.tm_year = currentBrokendowntime.tm_year;
2254 0 : brokendowntime.tm_hour = atoi(pszHourOrYear);
2255 0 : brokendowntime.tm_min = atoi(pszHourOrYear + 3);
2256 : }
2257 :
2258 4 : if (bBrokenDownTimeValid)
2259 4 : nUnixTime = CPLYMDHMSToUnixTime(&brokendowntime);
2260 : else
2261 0 : nUnixTime = 0;
2262 :
2263 4 : if (pszNextToken == NULL)
2264 0 : return FALSE;
2265 :
2266 4 : pszFilename = pszNextToken;
2267 :
2268 4 : char* pszCurPtr = pszFilename;
2269 66 : while( *pszCurPtr != '\0')
2270 : {
2271 : /* In case of a link, stop before the pointed part of the link */
2272 58 : if (pszPermissions[0] == 'l' && strncmp(pszCurPtr, " -> ", 4) == 0)
2273 : {
2274 0 : break;
2275 : }
2276 58 : pszCurPtr ++;
2277 : }
2278 4 : *pszCurPtr = '\0';
2279 :
2280 4 : return TRUE;
2281 : }
2282 :
2283 : /************************************************************************/
2284 : /* GetFileList() */
2285 : /************************************************************************/
2286 :
2287 34 : char** VSICurlFilesystemHandler::GetFileList(const char *pszDirname, int* pbGotFileList)
2288 : {
2289 : if (ENABLE_DEBUG)
2290 34 : CPLDebug("VSICURL", "GetFileList(%s)" , pszDirname);
2291 :
2292 34 : *pbGotFileList = FALSE;
2293 :
2294 : /* HACK (optimization in fact) for MBTiles driver */
2295 34 : if (strstr(pszDirname, ".tiles.mapbox.com") != NULL)
2296 2 : return NULL;
2297 :
2298 32 : if (strncmp(pszDirname, "/vsicurl/ftp", strlen("/vsicurl/ftp")) == 0)
2299 : {
2300 : WriteFuncStruct sWriteFuncData;
2301 2 : sWriteFuncData.pBuffer = NULL;
2302 :
2303 2 : CPLString osDirname(pszDirname + strlen("/vsicurl/"));
2304 2 : osDirname += '/';
2305 :
2306 2 : char** papszFileList = NULL;
2307 :
2308 2 : for(int iTry=0;iTry<2;iTry++)
2309 : {
2310 2 : CURL* hCurlHandle = GetCurlHandleFor(osDirname);
2311 2 : VSICurlSetOptions(hCurlHandle, osDirname.c_str());
2312 :
2313 : /* On the first pass, we want to try fetching all the possible */
2314 : /* informations (filename, file/directory, size). If that */
2315 : /* does not work, then try again with CURLOPT_DIRLISTONLY set */
2316 2 : if (iTry == 1)
2317 : {
2318 : /* 7.16.4 */
2319 : #if LIBCURL_VERSION_NUM <= 0x071004
2320 : curl_easy_setopt(hCurlHandle, CURLOPT_FTPLISTONLY, 1);
2321 : #elif LIBCURL_VERSION_NUM > 0x071004
2322 0 : curl_easy_setopt(hCurlHandle, CURLOPT_DIRLISTONLY, 1);
2323 : #endif
2324 : }
2325 :
2326 2 : VSICURLInitWriteFuncStruct(&sWriteFuncData, NULL, NULL, NULL);
2327 2 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
2328 2 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
2329 :
2330 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
2331 2 : szCurlErrBuf[0] = '\0';
2332 2 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
2333 :
2334 2 : curl_easy_perform(hCurlHandle);
2335 :
2336 2 : if (sWriteFuncData.pBuffer == NULL)
2337 0 : return NULL;
2338 :
2339 2 : char* pszLine = sWriteFuncData.pBuffer;
2340 : char* c;
2341 2 : int nCount = 0;
2342 :
2343 2 : if (EQUALN(pszLine, "<!DOCTYPE HTML", strlen("<!DOCTYPE HTML")) ||
2344 : EQUALN(pszLine, "<HTML>", 6))
2345 : {
2346 : papszFileList = ParseHTMLFileList(pszDirname,
2347 : sWriteFuncData.pBuffer,
2348 0 : pbGotFileList);
2349 0 : break;
2350 : }
2351 2 : else if (iTry == 0)
2352 : {
2353 2 : CPLStringList oFileList;
2354 2 : *pbGotFileList = TRUE;
2355 :
2356 8 : while( (c = strchr(pszLine, '\n')) != NULL)
2357 : {
2358 4 : *c = 0;
2359 4 : if (c - pszLine > 0 && c[-1] == '\r')
2360 0 : c[-1] = 0;
2361 :
2362 4 : char* pszFilename = NULL;
2363 4 : int bSizeValid = FALSE;
2364 4 : GUIntBig nFileSize = 0;
2365 4 : int bIsDirectory = FALSE;
2366 4 : GIntBig mUnixTime = 0;
2367 4 : if (!VSICurlParseFullFTPLine(pszLine, pszFilename,
2368 : bSizeValid, nFileSize,
2369 : bIsDirectory, mUnixTime))
2370 0 : break;
2371 :
2372 4 : if (strcmp(pszFilename, ".") != 0 &&
2373 : strcmp(pszFilename, "..") != 0)
2374 : {
2375 : CPLString osCachedFilename =
2376 4 : CPLSPrintf("%s/%s", pszDirname + strlen("/vsicurl/"), pszFilename);
2377 4 : CachedFileProp* cachedFileProp = GetCachedFileProp(osCachedFilename);
2378 4 : cachedFileProp->eExists = EXIST_YES;
2379 4 : cachedFileProp->bHastComputedFileSize = bSizeValid;
2380 4 : cachedFileProp->fileSize = nFileSize;
2381 4 : cachedFileProp->bIsDirectory = bIsDirectory;
2382 4 : cachedFileProp->mTime = mUnixTime;
2383 :
2384 4 : oFileList.AddString(pszFilename);
2385 : if (ENABLE_DEBUG)
2386 : {
2387 : struct tm brokendowntime;
2388 4 : CPLUnixTimeToYMDHMS(mUnixTime, &brokendowntime);
2389 : CPLDebug("VSICURL", "File[%d] = %s, is_dir = %d, size = " CPL_FRMT_GUIB ", time = %04d/%02d/%02d %02d:%02d:%02d",
2390 : nCount, pszFilename, bIsDirectory, nFileSize,
2391 : brokendowntime.tm_year + 1900, brokendowntime.tm_mon + 1, brokendowntime.tm_mday,
2392 4 : brokendowntime.tm_hour, brokendowntime.tm_min, brokendowntime.tm_sec);
2393 : }
2394 :
2395 4 : nCount ++;
2396 : }
2397 :
2398 4 : pszLine = c + 1;
2399 : }
2400 :
2401 2 : if (c == NULL)
2402 : {
2403 2 : papszFileList = oFileList.StealList();
2404 : break;
2405 0 : }
2406 : }
2407 : else
2408 : {
2409 0 : CPLStringList oFileList;
2410 0 : *pbGotFileList = TRUE;
2411 :
2412 0 : while( (c = strchr(pszLine, '\n')) != NULL)
2413 : {
2414 0 : *c = 0;
2415 0 : if (c - pszLine > 0 && c[-1] == '\r')
2416 0 : c[-1] = 0;
2417 :
2418 0 : if (strcmp(pszLine, ".") != 0 &&
2419 : strcmp(pszLine, "..") != 0)
2420 : {
2421 0 : oFileList.AddString(pszLine);
2422 : if (ENABLE_DEBUG)
2423 0 : CPLDebug("VSICURL", "File[%d] = %s", nCount, pszLine);
2424 0 : nCount ++;
2425 : }
2426 :
2427 0 : pszLine = c + 1;
2428 : }
2429 :
2430 0 : papszFileList = oFileList.StealList();
2431 : }
2432 :
2433 0 : CPLFree(sWriteFuncData.pBuffer);
2434 0 : sWriteFuncData.pBuffer = NULL;
2435 : }
2436 :
2437 2 : CPLFree(sWriteFuncData.pBuffer);
2438 :
2439 2 : return papszFileList;
2440 : }
2441 :
2442 : /* Try to recognize HTML pages that list the content of a directory */
2443 : /* Currently this supports what Apache and shttpd can return */
2444 30 : else if (strncmp(pszDirname, "/vsicurl/http://", strlen("/vsicurl/http://")) == 0 ||
2445 : strncmp(pszDirname, "/vsicurl/https://", strlen("/vsicurl/https://")) == 0)
2446 : {
2447 : WriteFuncStruct sWriteFuncData;
2448 :
2449 30 : CPLString osDirname(pszDirname + strlen("/vsicurl/"));
2450 30 : osDirname += '/';
2451 :
2452 : #if LIBCURL_VERSION_NUM < 0x070B00
2453 : /* Curl 7.10.X doesn't manage to unset the CURLOPT_RANGE that would have been */
2454 : /* previously set, so we have to reinit the connection handle */
2455 : GetCurlHandleFor("");
2456 : #endif
2457 :
2458 30 : CURL* hCurlHandle = GetCurlHandleFor(osDirname);
2459 30 : VSICurlSetOptions(hCurlHandle, osDirname.c_str());
2460 :
2461 30 : curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, NULL);
2462 :
2463 30 : VSICURLInitWriteFuncStruct(&sWriteFuncData, NULL, NULL, NULL);
2464 30 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
2465 30 : curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
2466 :
2467 : char szCurlErrBuf[CURL_ERROR_SIZE+1];
2468 30 : szCurlErrBuf[0] = '\0';
2469 30 : curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
2470 :
2471 30 : curl_easy_perform(hCurlHandle);
2472 :
2473 30 : if (sWriteFuncData.pBuffer == NULL)
2474 0 : return NULL;
2475 :
2476 : char** papszFileList = ParseHTMLFileList(pszDirname,
2477 : sWriteFuncData.pBuffer,
2478 30 : pbGotFileList);
2479 :
2480 30 : CPLFree(sWriteFuncData.pBuffer);
2481 30 : return papszFileList;
2482 : }
2483 :
2484 0 : return NULL;
2485 : }
2486 :
2487 : /************************************************************************/
2488 : /* Stat() */
2489 : /************************************************************************/
2490 :
2491 1484 : int VSICurlFilesystemHandler::Stat( const char *pszFilename, VSIStatBufL *pStatBuf,
2492 : int nFlags )
2493 : {
2494 1484 : CPLString osFilename(pszFilename);
2495 :
2496 1484 : memset(pStatBuf, 0, sizeof(VSIStatBufL));
2497 :
2498 : const char* pszOptionVal =
2499 1484 : CPLGetConfigOption( "GDAL_DISABLE_READDIR_ON_OPEN", "NO" );
2500 : int bSkipReadDir = EQUAL(pszOptionVal, "EMPTY_DIR") ||
2501 1484 : CSLTestBoolean(pszOptionVal);
2502 :
2503 : /* Does it look like a FTP directory ? */
2504 1488 : if (strncmp(osFilename, "/vsicurl/ftp", strlen("/vsicurl/ftp")) == 0 &&
2505 4 : pszFilename[strlen(osFilename) - 1] == '/' && !bSkipReadDir)
2506 : {
2507 0 : char** papszFileList = ReadDir(osFilename);
2508 0 : if (papszFileList)
2509 : {
2510 0 : pStatBuf->st_mode = S_IFDIR;
2511 0 : pStatBuf->st_size = 0;
2512 :
2513 0 : CSLDestroy(papszFileList);
2514 :
2515 0 : return 0;
2516 : }
2517 0 : return -1;
2518 : }
2519 1484 : else if (strchr(CPLGetFilename(osFilename), '.') != NULL &&
2520 : strncmp(CPLGetExtension(osFilename), "zip", 3) != 0 &&
2521 : !bSkipReadDir)
2522 : {
2523 : int bGotFileList;
2524 1372 : char** papszFileList = ReadDir(CPLGetDirname(osFilename), &bGotFileList);
2525 1372 : int bFound = (VSICurlIsFileInList(papszFileList, CPLGetFilename(osFilename)) != -1);
2526 1372 : CSLDestroy(papszFileList);
2527 1372 : if (bGotFileList && !bFound)
2528 : {
2529 28 : return -1;
2530 : }
2531 : }
2532 :
2533 1456 : VSICurlHandle oHandle( this, osFilename + strlen("/vsicurl/"));
2534 :
2535 1456 : if ( oHandle.IsKnownFileSize() ||
2536 : ((nFlags & VSI_STAT_SIZE_FLAG) && !oHandle.IsDirectory() &&
2537 : CSLTestBoolean(CPLGetConfigOption("CPL_VSIL_CURL_SLOW_GET_SIZE", "YES"))) )
2538 44 : pStatBuf->st_size = oHandle.GetFileSize();
2539 :
2540 1456 : int nRet = (oHandle.Exists()) ? 0 : -1;
2541 1456 : pStatBuf->st_mtime = oHandle.GetMTime();
2542 1456 : pStatBuf->st_mode = oHandle.IsDirectory() ? S_IFDIR : S_IFREG;
2543 1456 : return nRet;
2544 : }
2545 :
2546 : /************************************************************************/
2547 : /* Unlink() */
2548 : /************************************************************************/
2549 :
2550 0 : int VSICurlFilesystemHandler::Unlink( const char *pszFilename )
2551 : {
2552 0 : return -1;
2553 : }
2554 :
2555 : /************************************************************************/
2556 : /* Rename() */
2557 : /************************************************************************/
2558 :
2559 0 : int VSICurlFilesystemHandler::Rename( const char *oldpath, const char *newpath )
2560 : {
2561 0 : return -1;
2562 : }
2563 :
2564 : /************************************************************************/
2565 : /* Mkdir() */
2566 : /************************************************************************/
2567 :
2568 0 : int VSICurlFilesystemHandler::Mkdir( const char *pszDirname, long nMode )
2569 : {
2570 0 : return -1;
2571 : }
2572 : /************************************************************************/
2573 : /* Rmdir() */
2574 : /************************************************************************/
2575 :
2576 0 : int VSICurlFilesystemHandler::Rmdir( const char *pszDirname )
2577 : {
2578 0 : return -1;
2579 : }
2580 :
2581 : /************************************************************************/
2582 : /* ReadDir() */
2583 : /************************************************************************/
2584 :
2585 2868 : char** VSICurlFilesystemHandler::ReadDir( const char *pszDirname, int* pbGotFileList )
2586 : {
2587 2868 : CPLString osDirname(pszDirname);
2588 5736 : while (osDirname[strlen(osDirname) - 1] == '/')
2589 0 : osDirname.erase(strlen(osDirname) - 1);
2590 :
2591 2868 : const char* pszUpDir = strstr(osDirname, "/..");
2592 2868 : if (pszUpDir != NULL)
2593 : {
2594 0 : int pos = pszUpDir - osDirname.c_str() - 1;
2595 0 : while(pos >= 0 && osDirname[pos] != '/')
2596 0 : pos --;
2597 0 : if (pos >= 1)
2598 : {
2599 0 : osDirname = osDirname.substr(0, pos) + CPLString(pszUpDir + 3);
2600 : }
2601 : }
2602 :
2603 2868 : CPLMutexHolder oHolder( &hMutex );
2604 :
2605 : /* If we know the file exists and is not a directory, then don't try to list its content */
2606 2868 : CachedFileProp* cachedFileProp = GetCachedFileProp(osDirname.c_str() + strlen("/vsicurl/"));
2607 2868 : if (cachedFileProp->eExists == EXIST_YES && !cachedFileProp->bIsDirectory)
2608 : {
2609 1344 : if (pbGotFileList)
2610 0 : *pbGotFileList = TRUE;
2611 1344 : return NULL;
2612 : }
2613 :
2614 1524 : CachedDirList* psCachedDirList = cacheDirList[osDirname];
2615 1524 : if (psCachedDirList == NULL)
2616 : {
2617 34 : psCachedDirList = (CachedDirList*) CPLMalloc(sizeof(CachedDirList));
2618 34 : psCachedDirList->papszFileList = GetFileList(osDirname, &psCachedDirList->bGotFileList);
2619 34 : cacheDirList[osDirname] = psCachedDirList;
2620 : }
2621 :
2622 1524 : if (pbGotFileList)
2623 1466 : *pbGotFileList = psCachedDirList->bGotFileList;
2624 :
2625 1524 : return CSLDuplicate(psCachedDirList->papszFileList);
2626 : }
2627 :
2628 : /************************************************************************/
2629 : /* ReadDir() */
2630 : /************************************************************************/
2631 :
2632 1402 : char** VSICurlFilesystemHandler::ReadDir( const char *pszDirname )
2633 : {
2634 1402 : return ReadDir(pszDirname, NULL);
2635 : }
2636 :
2637 : /************************************************************************/
2638 : /* VSIInstallCurlFileHandler() */
2639 : /************************************************************************/
2640 :
2641 : /**
2642 : * \brief Install /vsicurl/ HTTP/FTP file system handler (requires libcurl)
2643 : *
2644 : * A special file handler is installed that allows reading on-the-fly of files
2645 : * available through HTTP/FTP web protocols, without downloading the entire file.
2646 : *
2647 : * Recognized filenames are of the form /vsicurl/http://path/to/remote/ressource or
2648 : * /vsicurl/ftp://path/to/remote/ressource where path/to/remote/ressource is the
2649 : * URL of a remote ressource.
2650 : *
2651 : * Partial downloads (requires the HTTP server to support random reading) are done
2652 : * with a 16 KB granularity by default. If the driver detects sequential reading
2653 : * it will progressively increase the chunk size up to 2 MB to improve download
2654 : * performance.
2655 : *
2656 : * The GDAL_HTTP_PROXY and GDAL_HTTP_PROXYUSERPWD configuration options can be
2657 : * used to define a proxy server. The syntax to use is the one of Curl CURLOPT_PROXY
2658 : * and CURLOPT_PROXYUSERPWD options.
2659 : *
2660 : * VSIStatL() will return the size in st_size member and file
2661 : * nature- file or directory - in st_mode member (the later only reliable with FTP
2662 : * resources for now).
2663 : *
2664 : * VSIReadDir() should be able to parse the HTML directory listing returned by the
2665 : * most popular web servers, such as Apache or Microsoft IIS.
2666 : *
2667 : * This special file handler can be combined with other virtual filesystems handlers,
2668 : * such as /vsizip. For example, /vsizip//vsicurl/path/to/remote/file.zip/path/inside/zip
2669 : *
2670 : * @since GDAL 1.8.0
2671 : */
2672 1341 : void VSIInstallCurlFileHandler(void)
2673 : {
2674 1341 : VSIFileManager::InstallHandler( "/vsicurl/", new VSICurlFilesystemHandler );
2675 1341 : }
2676 :
2677 : /************************************************************************/
2678 : /* VSICurlInstallReadCbk() */
2679 : /************************************************************************/
2680 :
2681 2 : int VSICurlInstallReadCbk (VSILFILE* fp,
2682 : VSICurlReadCbkFunc pfnReadCbk,
2683 : void* pfnUserData,
2684 : int bStopOnInterrruptUntilUninstall)
2685 : {
2686 : return ((VSICurlHandle*)fp)->InstallReadCbk(pfnReadCbk, pfnUserData,
2687 2 : bStopOnInterrruptUntilUninstall);
2688 : }
2689 :
2690 :
2691 : /************************************************************************/
2692 : /* VSICurlUninstallReadCbk() */
2693 : /************************************************************************/
2694 :
2695 2 : int VSICurlUninstallReadCbk(VSILFILE* fp)
2696 : {
2697 2 : return ((VSICurlHandle*)fp)->UninstallReadCbk();
2698 : }
2699 :
2700 : #endif /* HAVE_CURL */
|