LCOV - code coverage report
Current view: directory - port - cpl_recode_iconv.cpp (source / functions) Found Hit Coverage
Test: gdal_filtered.info Lines: 82 15 18.3 %
Date: 2012-04-28 Functions: 3 1 33.3 %

       1                 : /**********************************************************************
       2                 :  * $Id: cpl_recode_iconv.cpp 23653 2011-12-29 14:27:11Z rouault $
       3                 :  *
       4                 :  * Name:     cpl_recode_iconv.cpp
       5                 :  * Project:  CPL - Common Portability Library
       6                 :  * Purpose:  Character set recoding and char/wchar_t conversions implemented
       7                 :  *           using the iconv() functionality.
       8                 :  * Author:   Andrey Kiselev, dron@ak4719.spb.edu
       9                 :  *
      10                 :  **********************************************************************
      11                 :  * Copyright (c) 2011, Andrey Kiselev <dron@ak4719.spb.edu>
      12                 :  *
      13                 :  * Permission to use, copy, modify, and distribute this software for any
      14                 :  * purpose with or without fee is hereby granted, provided that the above
      15                 :  * copyright notice and this permission notice appear in all copies.
      16                 :  *
      17                 :  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
      18                 :  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
      19                 :  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
      20                 :  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
      21                 :  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
      22                 :  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
      23                 :  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
      24                 :  **********************************************************************/
      25                 : 
      26                 : #include "cpl_port.h"
      27                 : 
      28                 : CPL_CVSID("$Id: cpl_recode_iconv.cpp 23653 2011-12-29 14:27:11Z rouault $");
      29                 : 
      30                 : #ifdef CPL_RECODE_ICONV
      31                 : 
      32                 : #include <iconv.h>
      33                 : #include "cpl_string.h"
      34                 : 
      35                 : #ifndef ICONV_CPP_CONST
      36                 : #define ICONV_CPP_CONST ICONV_CONST
      37                 : #endif
      38                 : 
      39                 : #define CPL_RECODE_DSTBUF_SIZE 32768
      40                 : 
      41                 : /************************************************************************/
      42                 : /*                          CPLRecodeIconv()                            */
      43                 : /************************************************************************/
      44                 : 
      45                 : /**
      46                 :  * Convert a string from a source encoding to a destination encoding
      47                 :  * using the iconv() function.
      48                 :  *
      49                 :  * If an error occurs an error may, or may not be posted with CPLError(). 
      50                 :  *
      51                 :  * @param pszSource a NULL terminated string.
      52                 :  * @param pszSrcEncoding the source encoding.
      53                 :  * @param pszDstEncoding the destination encoding.
      54                 :  *
      55                 :  * @return a NULL terminated string which should be freed with CPLFree().
      56                 :  */
      57                 : 
      58             644 : char *CPLRecodeIconv( const char *pszSource, 
      59                 :                       const char *pszSrcEncoding, 
      60                 :                       const char *pszDstEncoding )
      61                 : 
      62                 : {
      63                 :     iconv_t sConv;
      64                 : 
      65             644 :     sConv = iconv_open( pszDstEncoding, pszSrcEncoding );
      66                 : 
      67             644 :     if ( sConv == (iconv_t)-1 )
      68                 :     {
      69                 :         CPLError( CE_Warning, CPLE_AppDefined, 
      70                 :                   "Recode from %s to %s failed with the error: \"%s\".", 
      71               0 :                   pszSrcEncoding, pszDstEncoding, strerror(errno) );
      72                 : 
      73               0 :         return CPLStrdup(pszSource);
      74                 :     }
      75                 : 
      76                 : /* -------------------------------------------------------------------- */
      77                 : /*      XXX: There is a portability issue: iconv() function could be    */
      78                 : /*      declared differently on different platforms. The second         */
      79                 : /*      argument could be declared as char** (as POSIX defines) or      */
      80                 : /*      as a const char**. Handle it with the ICONV_CPP_CONST macro here.   */
      81                 : /* -------------------------------------------------------------------- */
      82             644 :     ICONV_CPP_CONST char *pszSrcBuf = (ICONV_CPP_CONST char *)pszSource;
      83             644 :     size_t  nSrcLen = strlen( pszSource );
      84             644 :     size_t  nDstCurLen = MAX(CPL_RECODE_DSTBUF_SIZE, nSrcLen + 1);
      85             644 :     size_t  nDstLen = nDstCurLen;
      86             644 :     char    *pszDestination = (char *)CPLCalloc( nDstCurLen, sizeof(char) );
      87             644 :     char    *pszDstBuf = pszDestination;
      88                 : 
      89            1932 :     while ( nSrcLen > 0 )
      90                 :     {
      91                 :         size_t  nConverted =
      92             644 :             iconv( sConv, &pszSrcBuf, &nSrcLen, &pszDstBuf, &nDstLen );
      93                 : 
      94             644 :         if ( nConverted == (size_t)-1 )
      95                 :         {
      96               0 :             if ( errno == EILSEQ )
      97                 :             {
      98                 :                 // Skip the invalid sequence in the input string.
      99                 :                 static int bHasWarned = FALSE;
     100               0 :                 if (!bHasWarned)
     101                 :                 {
     102               0 :                     bHasWarned = TRUE;
     103                 :                     CPLError(CE_Warning, CPLE_AppDefined,
     104                 :                             "One or several characters couldn't be converted correctly from %s to %s.\n"
     105                 :                             "This warning will not be emitted anymore",
     106               0 :                              pszSrcEncoding, pszDstEncoding);
     107                 :                 }
     108               0 :                 nSrcLen--, pszSrcBuf++;
     109               0 :                 continue;
     110                 :             }
     111                 : 
     112               0 :             else if ( errno == E2BIG )
     113                 :             {
     114                 :                 // We are running out of the output buffer.
     115                 :                 // Dynamically increase the buffer size.
     116               0 :                 size_t nTmp = nDstCurLen;
     117               0 :                 nDstCurLen *= 2;
     118                 :                 pszDestination =
     119               0 :                     (char *)CPLRealloc( pszDestination, nDstCurLen );
     120               0 :                 pszDstBuf = pszDestination + nTmp - nDstLen;
     121               0 :                 nDstLen += nDstCurLen - nTmp;
     122               0 :                 continue;
     123                 :             }
     124                 : 
     125                 :             else
     126               0 :                 break;
     127                 :         }
     128                 :     }
     129                 : 
     130             644 :     pszDestination[nDstCurLen - nDstLen] = '\0';
     131                 : 
     132             644 :     iconv_close( sConv );
     133                 : 
     134             644 :     return pszDestination;
     135                 : }
     136                 : 
     137                 : /************************************************************************/
     138                 : /*                      CPLRecodeFromWCharIconv()                       */
     139                 : /************************************************************************/
     140                 : 
     141                 : /**
     142                 :  * Convert wchar_t string to UTF-8. 
     143                 :  *
     144                 :  * Convert a wchar_t string into a multibyte utf-8 string
     145                 :  * using the iconv() function.
     146                 :  *
     147                 :  * Note that the wchar_t type varies in size on different systems. On
     148                 :  * win32 it is normally 2 bytes, and on unix 4 bytes.
     149                 :  *
     150                 :  * If an error occurs an error may, or may not be posted with CPLError(). 
     151                 :  *
     152                 :  * @param pwszSource the source wchar_t string, terminated with a 0 wchar_t.
     153                 :  * @param pszSrcEncoding the source encoding, typically CPL_ENC_UCS2.
     154                 :  * @param pszDstEncoding the destination encoding, typically CPL_ENC_UTF8.
     155                 :  *
     156                 :  * @return a zero terminated multi-byte string which should be freed with 
     157                 :  * CPLFree(), or NULL if an error occurs. 
     158                 :  */
     159                 : 
     160               0 : char *CPLRecodeFromWCharIconv( const wchar_t *pwszSource, 
     161                 :                                const char *pszSrcEncoding, 
     162                 :                                const char *pszDstEncoding )
     163                 : 
     164                 : {
     165                 : /* -------------------------------------------------------------------- */
     166                 : /*      What is the source length.                                      */
     167                 : /* -------------------------------------------------------------------- */
     168               0 :     size_t  nSrcLen = 0;
     169                 : 
     170               0 :     while ( pwszSource[nSrcLen] != 0 )
     171               0 :         nSrcLen++;
     172                 : 
     173                 : /* -------------------------------------------------------------------- */
     174                 : /*      iconv() does not support wchar_t so we need to repack the       */
     175                 : /*      characters according to the width of a character in the         */
     176                 : /*      source encoding.  For instance if wchar_t is 4 bytes but our    */
     177                 : /*      source is UTF16 then we need to pack down into 2 byte           */
     178                 : /*      characters before passing to iconv().                           */
     179                 : /* -------------------------------------------------------------------- */
     180               0 :     int nTargetCharWidth = CPLEncodingCharSize( pszSrcEncoding );
     181                 : 
     182               0 :     if( nTargetCharWidth < 1 )
     183                 :     {
     184                 :         CPLError( CE_Warning, CPLE_AppDefined,
     185                 :                   "Recode from %s with CPLRecodeFromWChar() failed because"
     186                 :                   " the width of characters in the encoding are not known.",
     187               0 :                   pszSrcEncoding );
     188               0 :         return CPLStrdup("");
     189                 :     }
     190                 : 
     191               0 :     GByte *pszIconvSrcBuf = (GByte*) CPLCalloc((nSrcLen+1),nTargetCharWidth);
     192                 :     unsigned int iSrc;
     193                 : 
     194               0 :     for( iSrc = 0; iSrc <= nSrcLen; iSrc++ )
     195                 :     {
     196               0 :         if( nTargetCharWidth == 1 )
     197               0 :             pszIconvSrcBuf[iSrc] = (GByte) pwszSource[iSrc];
     198               0 :         else if( nTargetCharWidth == 2 )
     199               0 :             ((short *)pszIconvSrcBuf)[iSrc] = (short) pwszSource[iSrc];
     200               0 :         else if( nTargetCharWidth == 4 )
     201               0 :             ((GInt32 *)pszIconvSrcBuf)[iSrc] = pwszSource[iSrc];
     202                 :     }
     203                 : 
     204                 : /* -------------------------------------------------------------------- */
     205                 : /*      Create the iconv() translation object.                          */
     206                 : /* -------------------------------------------------------------------- */
     207                 :     iconv_t sConv;
     208                 : 
     209               0 :     sConv = iconv_open( pszDstEncoding, pszSrcEncoding );
     210                 : 
     211               0 :     if ( sConv == (iconv_t)-1 )
     212                 :     {
     213               0 :         CPLFree( pszIconvSrcBuf );
     214                 :         CPLError( CE_Warning, CPLE_AppDefined, 
     215                 :                   "Recode from %s to %s failed with the error: \"%s\".", 
     216               0 :                   pszSrcEncoding, pszDstEncoding, strerror(errno) );
     217                 : 
     218               0 :         return CPLStrdup( "" );
     219                 :     }
     220                 : 
     221                 : /* -------------------------------------------------------------------- */
     222                 : /*      XXX: There is a portability issue: iconv() function could be    */
     223                 : /*      declared differently on different platforms. The second         */
     224                 : /*      argument could be declared as char** (as POSIX defines) or      */
     225                 : /*      as a const char**. Handle it with the ICONV_CPP_CONST macro here.   */
     226                 : /* -------------------------------------------------------------------- */
     227               0 :     ICONV_CPP_CONST char *pszSrcBuf = (ICONV_CPP_CONST char *) pszIconvSrcBuf;
     228                 : 
     229                 :     /* iconv expects a number of bytes, not characters */
     230               0 :     nSrcLen *= sizeof(wchar_t);
     231                 : 
     232                 : /* -------------------------------------------------------------------- */
     233                 : /*      Allocate destination buffer.                                    */
     234                 : /* -------------------------------------------------------------------- */
     235               0 :     size_t  nDstCurLen = MAX(CPL_RECODE_DSTBUF_SIZE, nSrcLen + 1);
     236               0 :     size_t  nDstLen = nDstCurLen;
     237               0 :     char    *pszDestination = (char *)CPLCalloc( nDstCurLen, sizeof(char) );
     238               0 :     char    *pszDstBuf = pszDestination;
     239                 : 
     240               0 :     while ( nSrcLen > 0 )
     241                 :     {
     242                 :         size_t  nConverted =
     243               0 :             iconv( sConv, &pszSrcBuf, &nSrcLen, &pszDstBuf, &nDstLen );
     244                 : 
     245               0 :         if ( nConverted == (size_t)-1 )
     246                 :         {
     247               0 :             if ( errno == EILSEQ )
     248                 :             {
     249                 :                 // Skip the invalid sequence in the input string.
     250               0 :                 nSrcLen--;
     251               0 :                 pszSrcBuf += sizeof(wchar_t);
     252                 :                 static int bHasWarned = FALSE;
     253               0 :                 if (!bHasWarned)
     254                 :                 {
     255               0 :                     bHasWarned = TRUE;
     256                 :                     CPLError(CE_Warning, CPLE_AppDefined,
     257                 :                             "One or several characters couldn't be converted correctly from %s to %s.\n"
     258                 :                             "This warning will not be emitted anymore",
     259               0 :                              pszSrcEncoding, pszDstEncoding);
     260                 :                 }
     261               0 :                 continue;
     262                 :             }
     263                 : 
     264               0 :             else if ( errno == E2BIG )
     265                 :             {
     266                 :                 // We are running out of the output buffer.
     267                 :                 // Dynamically increase the buffer size.
     268               0 :                 size_t nTmp = nDstCurLen;
     269               0 :                 nDstCurLen *= 2;
     270                 :                 pszDestination =
     271               0 :                     (char *)CPLRealloc( pszDestination, nDstCurLen );
     272               0 :                 pszDstBuf = pszDestination + nTmp - nDstLen;
     273               0 :                 nDstLen += nDstCurLen - nTmp;
     274               0 :                 continue;
     275                 :             }
     276                 : 
     277                 :             else
     278               0 :                 break;
     279                 :         }
     280                 :     }
     281                 : 
     282               0 :     pszDestination[nDstCurLen - nDstLen] = '\0';
     283                 : 
     284               0 :     iconv_close( sConv );
     285                 : 
     286               0 :     CPLFree( pszIconvSrcBuf );
     287                 : 
     288               0 :     return pszDestination;
     289                 : }
     290                 : 
     291                 : /************************************************************************/
     292                 : /*                        CPLRecodeToWCharIconv()                       */
     293                 : /************************************************************************/
     294                 : 
     295                 : /**
     296                 :  * Convert UTF-8 string to a wchar_t string.
     297                 :  *
     298                 :  * Convert a 8bit, multi-byte per character input string into a wide
     299                 :  * character (wchar_t) string using the iconv() function.
     300                 :  *
     301                 :  * Note that the wchar_t type varies in size on different systems. On
     302                 :  * win32 it is normally 2 bytes, and on unix 4 bytes.
     303                 :  *
     304                 :  * If an error occurs an error may, or may not be posted with CPLError(). 
     305                 :  *
     306                 :  * @param pszSource input multi-byte character string.
     307                 :  * @param pszSrcEncoding source encoding, typically CPL_ENC_UTF8.
     308                 :  * @param pszDstEncoding destination encoding, typically CPL_ENC_UCS2. 
     309                 :  *
     310                 :  * @return the zero terminated wchar_t string (to be freed with CPLFree()) or
     311                 :  * NULL on error.
     312                 :  */
     313                 : 
     314               0 : wchar_t *CPLRecodeToWCharIconv( const char *pszSource,
     315                 :                                 const char *pszSrcEncoding, 
     316                 :                                 const char *pszDstEncoding )
     317                 : 
     318                 : {
     319                 :     return (wchar_t *)CPLRecodeIconv( pszSource,
     320               0 :                                       pszSrcEncoding, pszDstEncoding);
     321                 : }
     322                 : 
     323                 : #endif /* CPL_RECODE_ICONV */

Generated by: LCOV version 1.7