LCOV - code coverage report
Current view: directory - port - cpl_recode_iconv.cpp (source / functions) Found Hit Coverage
Test: gdal_filtered.info Lines: 82 15 18.3 %
Date: 2011-12-18 Functions: 3 1 33.3 %

       1                 : /**********************************************************************
       2                 :  * $Id: cpl_recode_iconv.cpp 22600 2011-06-28 13:36:36Z warmerdam $
       3                 :  *
       4                 :  * Name:     cpl_recode_iconv.cpp
       5                 :  * Project:  CPL - Common Portability Library
       6                 :  * Purpose:  Character set recoding and char/wchar_t conversions implemented
       7                 :  *           using the iconv() functionality.
       8                 :  * Author:   Andrey Kiselev, dron@ak4719.spb.edu
       9                 :  *
      10                 :  **********************************************************************
      11                 :  * Copyright (c) 2011, Andrey Kiselev <dron@ak4719.spb.edu>
      12                 :  *
      13                 :  * Permission to use, copy, modify, and distribute this software for any
      14                 :  * purpose with or without fee is hereby granted, provided that the above
      15                 :  * copyright notice and this permission notice appear in all copies.
      16                 :  *
      17                 :  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
      18                 :  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
      19                 :  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
      20                 :  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
      21                 :  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
      22                 :  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
      23                 :  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
      24                 :  **********************************************************************/
      25                 : 
      26                 : #include "cpl_port.h"
      27                 : 
      28                 : CPL_CVSID("$Id: cpl_recode_iconv.cpp 22600 2011-06-28 13:36:36Z warmerdam $");
      29                 : 
      30                 : #ifdef CPL_RECODE_ICONV
      31                 : 
      32                 : #include <iconv.h>
      33                 : #include "cpl_string.h"
      34                 : 
      35                 : #define CPL_RECODE_DSTBUF_SIZE 32768
      36                 : 
      37                 : /************************************************************************/
      38                 : /*                          CPLRecodeIconv()                            */
      39                 : /************************************************************************/
      40                 : 
      41                 : /**
      42                 :  * Convert a string from a source encoding to a destination encoding
      43                 :  * using the iconv() function.
      44                 :  *
      45                 :  * If an error occurs an error may, or may not be posted with CPLError(). 
      46                 :  *
      47                 :  * @param pszSource a NULL terminated string.
      48                 :  * @param pszSrcEncoding the source encoding.
      49                 :  * @param pszDstEncoding the destination encoding.
      50                 :  *
      51                 :  * @return a NULL terminated string which should be freed with CPLFree().
      52                 :  */
      53                 : 
      54               2 : char *CPLRecodeIconv( const char *pszSource, 
      55                 :                       const char *pszSrcEncoding, 
      56                 :                       const char *pszDstEncoding )
      57                 : 
      58                 : {
      59                 :     iconv_t sConv;
      60                 : 
      61               2 :     sConv = iconv_open( pszDstEncoding, pszSrcEncoding );
      62                 : 
      63               2 :     if ( sConv == (iconv_t)-1 )
      64                 :     {
      65                 :         CPLError( CE_Warning, CPLE_AppDefined, 
      66                 :                   "Recode from %s to %s failed with the error: \"%s\".", 
      67               0 :                   pszSrcEncoding, pszDstEncoding, strerror(errno) );
      68                 : 
      69               0 :         return CPLStrdup(pszSource);
      70                 :     }
      71                 : 
      72                 : /* -------------------------------------------------------------------- */
      73                 : /*      XXX: There is a portability issue: iconv() function could be    */
      74                 : /*      declared differently on different platforms. The second         */
      75                 : /*      argument could be declared as char** (as POSIX defines) or      */
      76                 : /*      as a const char**. Handle it with the ICONV_CONST macro here.   */
      77                 : /* -------------------------------------------------------------------- */
      78               2 :     ICONV_CONST char *pszSrcBuf = (ICONV_CONST char *)pszSource;
      79               2 :     size_t  nSrcLen = strlen( pszSource );
      80               2 :     size_t  nDstCurLen = MAX(CPL_RECODE_DSTBUF_SIZE, nSrcLen + 1);
      81               2 :     size_t  nDstLen = nDstCurLen;
      82               2 :     char    *pszDestination = (char *)CPLCalloc( nDstCurLen, sizeof(char) );
      83               2 :     char    *pszDstBuf = pszDestination;
      84                 : 
      85               6 :     while ( nSrcLen > 0 )
      86                 :     {
      87                 :         size_t  nConverted =
      88               2 :             iconv( sConv, &pszSrcBuf, &nSrcLen, &pszDstBuf, &nDstLen );
      89                 : 
      90               2 :         if ( nConverted == (size_t)-1 )
      91                 :         {
      92               0 :             if ( errno == EILSEQ )
      93                 :             {
      94                 :                 // Skip the invalid sequence in the input string.
      95                 :                 static int bHasWarned = FALSE;
      96               0 :                 if (!bHasWarned)
      97                 :                 {
      98               0 :                     bHasWarned = TRUE;
      99                 :                     CPLError(CE_Warning, CPLE_AppDefined,
     100                 :                             "One or several characters couldn't be converted correctly from %s to %s.\n"
     101                 :                             "This warning will not be emitted anymore",
     102               0 :                              pszSrcEncoding, pszDstEncoding);
     103                 :                 }
     104               0 :                 nSrcLen--, pszSrcBuf++;
     105               0 :                 continue;
     106                 :             }
     107                 : 
     108               0 :             else if ( errno == E2BIG )
     109                 :             {
     110                 :                 // We are running out of the output buffer.
     111                 :                 // Dynamically increase the buffer size.
     112               0 :                 size_t nTmp = nDstCurLen;
     113               0 :                 nDstCurLen *= 2;
     114                 :                 pszDestination =
     115               0 :                     (char *)CPLRealloc( pszDestination, nDstCurLen );
     116               0 :                 pszDstBuf = pszDestination + nTmp - nDstLen;
     117               0 :                 nDstLen += nDstCurLen - nTmp;
     118               0 :                 continue;
     119                 :             }
     120                 : 
     121                 :             else
     122               0 :                 break;
     123                 :         }
     124                 :     }
     125                 : 
     126               2 :     pszDestination[nDstCurLen - nDstLen] = '\0';
     127                 : 
     128               2 :     iconv_close( sConv );
     129                 : 
     130               2 :     return pszDestination;
     131                 : }
     132                 : 
     133                 : /************************************************************************/
     134                 : /*                      CPLRecodeFromWCharIconv()                       */
     135                 : /************************************************************************/
     136                 : 
     137                 : /**
     138                 :  * Convert wchar_t string to UTF-8. 
     139                 :  *
     140                 :  * Convert a wchar_t string into a multibyte utf-8 string
     141                 :  * using the iconv() function.
     142                 :  *
     143                 :  * Note that the wchar_t type varies in size on different systems. On
     144                 :  * win32 it is normally 2 bytes, and on unix 4 bytes.
     145                 :  *
     146                 :  * If an error occurs an error may, or may not be posted with CPLError(). 
     147                 :  *
     148                 :  * @param pwszSource the source wchar_t string, terminated with a 0 wchar_t.
     149                 :  * @param pszSrcEncoding the source encoding, typically CPL_ENC_UCS2.
     150                 :  * @param pszDstEncoding the destination encoding, typically CPL_ENC_UTF8.
     151                 :  *
     152                 :  * @return a zero terminated multi-byte string which should be freed with 
     153                 :  * CPLFree(), or NULL if an error occurs. 
     154                 :  */
     155                 : 
     156               0 : char *CPLRecodeFromWCharIconv( const wchar_t *pwszSource, 
     157                 :                                const char *pszSrcEncoding, 
     158                 :                                const char *pszDstEncoding )
     159                 : 
     160                 : {
     161                 : /* -------------------------------------------------------------------- */
     162                 : /*      What is the source length.                                      */
     163                 : /* -------------------------------------------------------------------- */
     164               0 :     size_t  nSrcLen = 0;
     165                 : 
     166               0 :     while ( pwszSource[nSrcLen] != 0 )
     167               0 :         nSrcLen++;
     168                 : 
     169                 : /* -------------------------------------------------------------------- */
     170                 : /*      iconv() does not support wchar_t so we need to repack the       */
     171                 : /*      characters according to the width of a character in the         */
     172                 : /*      source encoding.  For instance if wchar_t is 4 bytes but our    */
     173                 : /*      source is UTF16 then we need to pack down into 2 byte           */
     174                 : /*      characters before passing to iconv().                           */
     175                 : /* -------------------------------------------------------------------- */
     176               0 :     int nTargetCharWidth = CPLEncodingCharSize( pszSrcEncoding );
     177                 : 
     178               0 :     if( nTargetCharWidth < 1 )
     179                 :     {
     180                 :         CPLError( CE_Warning, CPLE_AppDefined,
     181                 :                   "Recode from %s with CPLRecodeFromWChar() failed because"
     182                 :                   " the width of characters in the encoding are not known.",
     183               0 :                   pszSrcEncoding );
     184               0 :         return CPLStrdup("");
     185                 :     }
     186                 : 
     187               0 :     GByte *pszIconvSrcBuf = (GByte*) CPLCalloc((nSrcLen+1),nTargetCharWidth);
     188                 :     unsigned int iSrc;
     189                 : 
     190               0 :     for( iSrc = 0; iSrc <= nSrcLen; iSrc++ )
     191                 :     {
     192               0 :         if( nTargetCharWidth == 1 )
     193               0 :             pszIconvSrcBuf[iSrc] = (GByte) pwszSource[iSrc];
     194               0 :         else if( nTargetCharWidth == 2 )
     195               0 :             ((short *)pszIconvSrcBuf)[iSrc] = (short) pwszSource[iSrc];
     196               0 :         else if( nTargetCharWidth == 4 )
     197               0 :             ((GInt32 *)pszIconvSrcBuf)[iSrc] = pwszSource[iSrc];
     198                 :     }
     199                 : 
     200                 : /* -------------------------------------------------------------------- */
     201                 : /*      Create the iconv() translation object.                          */
     202                 : /* -------------------------------------------------------------------- */
     203                 :     iconv_t sConv;
     204                 : 
     205               0 :     sConv = iconv_open( pszDstEncoding, pszSrcEncoding );
     206                 : 
     207               0 :     if ( sConv == (iconv_t)-1 )
     208                 :     {
     209               0 :         CPLFree( pszIconvSrcBuf );
     210                 :         CPLError( CE_Warning, CPLE_AppDefined, 
     211                 :                   "Recode from %s to %s failed with the error: \"%s\".", 
     212               0 :                   pszSrcEncoding, pszDstEncoding, strerror(errno) );
     213                 : 
     214               0 :         return CPLStrdup( "" );
     215                 :     }
     216                 : 
     217                 : /* -------------------------------------------------------------------- */
     218                 : /*      XXX: There is a portability issue: iconv() function could be    */
     219                 : /*      declared differently on different platforms. The second         */
     220                 : /*      argument could be declared as char** (as POSIX defines) or      */
     221                 : /*      as a const char**. Handle it with the ICONV_CONST macro here.   */
     222                 : /* -------------------------------------------------------------------- */
     223               0 :     ICONV_CONST char *pszSrcBuf = (ICONV_CONST char *) pszIconvSrcBuf;
     224                 : 
     225                 :     /* iconv expects a number of bytes, not characters */
     226               0 :     nSrcLen *= sizeof(wchar_t);
     227                 : 
     228                 : /* -------------------------------------------------------------------- */
     229                 : /*      Allocate destination buffer.                                    */
     230                 : /* -------------------------------------------------------------------- */
     231               0 :     size_t  nDstCurLen = MAX(CPL_RECODE_DSTBUF_SIZE, nSrcLen + 1);
     232               0 :     size_t  nDstLen = nDstCurLen;
     233               0 :     char    *pszDestination = (char *)CPLCalloc( nDstCurLen, sizeof(char) );
     234               0 :     char    *pszDstBuf = pszDestination;
     235                 : 
     236               0 :     while ( nSrcLen > 0 )
     237                 :     {
     238                 :         size_t  nConverted =
     239               0 :             iconv( sConv, &pszSrcBuf, &nSrcLen, &pszDstBuf, &nDstLen );
     240                 : 
     241               0 :         if ( nConverted == (size_t)-1 )
     242                 :         {
     243               0 :             if ( errno == EILSEQ )
     244                 :             {
     245                 :                 // Skip the invalid sequence in the input string.
     246               0 :                 nSrcLen--;
     247               0 :                 pszSrcBuf += sizeof(wchar_t);
     248                 :                 static int bHasWarned = FALSE;
     249               0 :                 if (!bHasWarned)
     250                 :                 {
     251               0 :                     bHasWarned = TRUE;
     252                 :                     CPLError(CE_Warning, CPLE_AppDefined,
     253                 :                             "One or several characters couldn't be converted correctly from %s to %s.\n"
     254                 :                             "This warning will not be emitted anymore",
     255               0 :                              pszSrcEncoding, pszDstEncoding);
     256                 :                 }
     257               0 :                 continue;
     258                 :             }
     259                 : 
     260               0 :             else if ( errno == E2BIG )
     261                 :             {
     262                 :                 // We are running out of the output buffer.
     263                 :                 // Dynamically increase the buffer size.
     264               0 :                 size_t nTmp = nDstCurLen;
     265               0 :                 nDstCurLen *= 2;
     266                 :                 pszDestination =
     267               0 :                     (char *)CPLRealloc( pszDestination, nDstCurLen );
     268               0 :                 pszDstBuf = pszDestination + nTmp - nDstLen;
     269               0 :                 nDstLen += nDstCurLen - nTmp;
     270               0 :                 continue;
     271                 :             }
     272                 : 
     273                 :             else
     274               0 :                 break;
     275                 :         }
     276                 :     }
     277                 : 
     278               0 :     pszDestination[nDstCurLen - nDstLen] = '\0';
     279                 : 
     280               0 :     iconv_close( sConv );
     281                 : 
     282               0 :     CPLFree( pszIconvSrcBuf );
     283                 : 
     284               0 :     return pszDestination;
     285                 : }
     286                 : 
     287                 : /************************************************************************/
     288                 : /*                        CPLRecodeToWCharIconv()                       */
     289                 : /************************************************************************/
     290                 : 
     291                 : /**
     292                 :  * Convert UTF-8 string to a wchar_t string.
     293                 :  *
     294                 :  * Convert a 8bit, multi-byte per character input string into a wide
     295                 :  * character (wchar_t) string using the iconv() function.
     296                 :  *
     297                 :  * Note that the wchar_t type varies in size on different systems. On
     298                 :  * win32 it is normally 2 bytes, and on unix 4 bytes.
     299                 :  *
     300                 :  * If an error occurs an error may, or may not be posted with CPLError(). 
     301                 :  *
     302                 :  * @param pszSource input multi-byte character string.
     303                 :  * @param pszSrcEncoding source encoding, typically CPL_ENC_UTF8.
     304                 :  * @param pszDstEncoding destination encoding, typically CPL_ENC_UCS2. 
     305                 :  *
     306                 :  * @return the zero terminated wchar_t string (to be freed with CPLFree()) or
     307                 :  * NULL on error.
     308                 :  */
     309                 : 
     310               0 : wchar_t *CPLRecodeToWCharIconv( const char *pszSource,
     311                 :                                 const char *pszSrcEncoding, 
     312                 :                                 const char *pszDstEncoding )
     313                 : 
     314                 : {
     315                 :     return (wchar_t *)CPLRecodeIconv( pszSource,
     316               0 :                                       pszSrcEncoding, pszDstEncoding);
     317                 : }
     318                 : 
     319                 : #endif /* CPL_RECODE_ICONV */

Generated by: LCOV version 1.7