LCOV - code coverage report
Current view: directory - port - cpl_recode_iconv.cpp (source / functions) Found Hit Coverage
Test: gdal_filtered.info Lines: 86 21 24.4 %
Date: 2012-12-26 Functions: 4 2 50.0 %

       1                 : /**********************************************************************
       2                 :  * $Id: cpl_recode_iconv.cpp 24555 2012-06-10 09:49:55Z rouault $
       3                 :  *
       4                 :  * Name:     cpl_recode_iconv.cpp
       5                 :  * Project:  CPL - Common Portability Library
       6                 :  * Purpose:  Character set recoding and char/wchar_t conversions implemented
       7                 :  *           using the iconv() functionality.
       8                 :  * Author:   Andrey Kiselev, dron@ak4719.spb.edu
       9                 :  *
      10                 :  **********************************************************************
      11                 :  * Copyright (c) 2011, Andrey Kiselev <dron@ak4719.spb.edu>
      12                 :  *
      13                 :  * Permission to use, copy, modify, and distribute this software for any
      14                 :  * purpose with or without fee is hereby granted, provided that the above
      15                 :  * copyright notice and this permission notice appear in all copies.
      16                 :  *
      17                 :  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
      18                 :  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
      19                 :  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
      20                 :  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
      21                 :  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
      22                 :  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
      23                 :  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
      24                 :  **********************************************************************/
      25                 : 
      26                 : #include "cpl_port.h"
      27                 : 
      28                 : CPL_CVSID("$Id: cpl_recode_iconv.cpp 24555 2012-06-10 09:49:55Z rouault $");
      29                 : 
      30                 : #ifdef CPL_RECODE_ICONV
      31                 : 
      32                 : #include <iconv.h>
      33                 : #include "cpl_string.h"
      34                 : 
      35                 : #ifndef ICONV_CPP_CONST
      36                 : #define ICONV_CPP_CONST ICONV_CONST
      37                 : #endif
      38                 : 
      39                 : #define CPL_RECODE_DSTBUF_SIZE 32768
      40                 : 
      41                 : /************************************************************************/
      42                 : /*                 CPLClearRecodeIconvWarningFlags()                    */
      43                 : /************************************************************************/
      44                 : 
      45                 : static int bHaveWarned1 = FALSE;
      46                 : static int bHaveWarned2 = FALSE;
      47                 : 
      48            6786 : void CPLClearRecodeIconvWarningFlags()
      49                 : {
      50            6786 :     bHaveWarned1 = FALSE;
      51            6786 :     bHaveWarned2 = FALSE;
      52            6786 : }
      53                 : 
      54                 : /************************************************************************/
      55                 : /*                          CPLRecodeIconv()                            */
      56                 : /************************************************************************/
      57                 : 
      58                 : /**
      59                 :  * Convert a string from a source encoding to a destination encoding
      60                 :  * using the iconv() function.
      61                 :  *
      62                 :  * If an error occurs an error may, or may not be posted with CPLError(). 
      63                 :  *
      64                 :  * @param pszSource a NULL terminated string.
      65                 :  * @param pszSrcEncoding the source encoding.
      66                 :  * @param pszDstEncoding the destination encoding.
      67                 :  *
      68                 :  * @return a NULL terminated string which should be freed with CPLFree().
      69                 :  */
      70                 : 
      71              35 : char *CPLRecodeIconv( const char *pszSource, 
      72                 :                       const char *pszSrcEncoding, 
      73                 :                       const char *pszDstEncoding )
      74                 : 
      75                 : {
      76                 :     iconv_t sConv;
      77                 : 
      78              35 :     sConv = iconv_open( pszDstEncoding, pszSrcEncoding );
      79                 : 
      80              35 :     if ( sConv == (iconv_t)-1 )
      81                 :     {
      82                 :         CPLError( CE_Warning, CPLE_AppDefined, 
      83                 :                   "Recode from %s to %s failed with the error: \"%s\".", 
      84               1 :                   pszSrcEncoding, pszDstEncoding, strerror(errno) );
      85                 : 
      86               1 :         return CPLStrdup(pszSource);
      87                 :     }
      88                 : 
      89                 : /* -------------------------------------------------------------------- */
      90                 : /*      XXX: There is a portability issue: iconv() function could be    */
      91                 : /*      declared differently on different platforms. The second         */
      92                 : /*      argument could be declared as char** (as POSIX defines) or      */
      93                 : /*      as a const char**. Handle it with the ICONV_CPP_CONST macro here.   */
      94                 : /* -------------------------------------------------------------------- */
      95              34 :     ICONV_CPP_CONST char *pszSrcBuf = (ICONV_CPP_CONST char *)pszSource;
      96              34 :     size_t  nSrcLen = strlen( pszSource );
      97              34 :     size_t  nDstCurLen = MAX(CPL_RECODE_DSTBUF_SIZE, nSrcLen + 1);
      98              34 :     size_t  nDstLen = nDstCurLen;
      99              34 :     char    *pszDestination = (char *)CPLCalloc( nDstCurLen, sizeof(char) );
     100              34 :     char    *pszDstBuf = pszDestination;
     101                 : 
     102             102 :     while ( nSrcLen > 0 )
     103                 :     {
     104                 :         size_t  nConverted =
     105              34 :             iconv( sConv, &pszSrcBuf, &nSrcLen, &pszDstBuf, &nDstLen );
     106                 : 
     107              34 :         if ( nConverted == (size_t)-1 )
     108                 :         {
     109               0 :             if ( errno == EILSEQ )
     110                 :             {
     111                 :                 // Skip the invalid sequence in the input string.
     112               0 :                 if (!bHaveWarned1)
     113                 :                 {
     114               0 :                     bHaveWarned1 = TRUE;
     115                 :                     CPLError(CE_Warning, CPLE_AppDefined,
     116                 :                             "One or several characters couldn't be converted correctly from %s to %s.\n"
     117                 :                             "This warning will not be emitted anymore",
     118               0 :                              pszSrcEncoding, pszDstEncoding);
     119                 :                 }
     120               0 :                 nSrcLen--, pszSrcBuf++;
     121               0 :                 continue;
     122                 :             }
     123                 : 
     124               0 :             else if ( errno == E2BIG )
     125                 :             {
     126                 :                 // We are running out of the output buffer.
     127                 :                 // Dynamically increase the buffer size.
     128               0 :                 size_t nTmp = nDstCurLen;
     129               0 :                 nDstCurLen *= 2;
     130                 :                 pszDestination =
     131               0 :                     (char *)CPLRealloc( pszDestination, nDstCurLen );
     132               0 :                 pszDstBuf = pszDestination + nTmp - nDstLen;
     133               0 :                 nDstLen += nDstCurLen - nTmp;
     134               0 :                 continue;
     135                 :             }
     136                 : 
     137                 :             else
     138               0 :                 break;
     139                 :         }
     140                 :     }
     141                 : 
     142              34 :     pszDestination[nDstCurLen - nDstLen] = '\0';
     143                 : 
     144              34 :     iconv_close( sConv );
     145                 : 
     146              34 :     return pszDestination;
     147                 : }
     148                 : 
     149                 : /************************************************************************/
     150                 : /*                      CPLRecodeFromWCharIconv()                       */
     151                 : /************************************************************************/
     152                 : 
     153                 : /**
     154                 :  * Convert wchar_t string to UTF-8. 
     155                 :  *
     156                 :  * Convert a wchar_t string into a multibyte utf-8 string
     157                 :  * using the iconv() function.
     158                 :  *
     159                 :  * Note that the wchar_t type varies in size on different systems. On
     160                 :  * win32 it is normally 2 bytes, and on unix 4 bytes.
     161                 :  *
     162                 :  * If an error occurs an error may, or may not be posted with CPLError(). 
     163                 :  *
     164                 :  * @param pwszSource the source wchar_t string, terminated with a 0 wchar_t.
     165                 :  * @param pszSrcEncoding the source encoding, typically CPL_ENC_UCS2.
     166                 :  * @param pszDstEncoding the destination encoding, typically CPL_ENC_UTF8.
     167                 :  *
     168                 :  * @return a zero terminated multi-byte string which should be freed with 
     169                 :  * CPLFree(), or NULL if an error occurs. 
     170                 :  */
     171                 : 
     172               0 : char *CPLRecodeFromWCharIconv( const wchar_t *pwszSource, 
     173                 :                                const char *pszSrcEncoding, 
     174                 :                                const char *pszDstEncoding )
     175                 : 
     176                 : {
     177                 : /* -------------------------------------------------------------------- */
     178                 : /*      What is the source length.                                      */
     179                 : /* -------------------------------------------------------------------- */
     180               0 :     size_t  nSrcLen = 0;
     181                 : 
     182               0 :     while ( pwszSource[nSrcLen] != 0 )
     183               0 :         nSrcLen++;
     184                 : 
     185                 : /* -------------------------------------------------------------------- */
     186                 : /*      iconv() does not support wchar_t so we need to repack the       */
     187                 : /*      characters according to the width of a character in the         */
     188                 : /*      source encoding.  For instance if wchar_t is 4 bytes but our    */
     189                 : /*      source is UTF16 then we need to pack down into 2 byte           */
     190                 : /*      characters before passing to iconv().                           */
     191                 : /* -------------------------------------------------------------------- */
     192               0 :     int nTargetCharWidth = CPLEncodingCharSize( pszSrcEncoding );
     193                 : 
     194               0 :     if( nTargetCharWidth < 1 )
     195                 :     {
     196                 :         CPLError( CE_Warning, CPLE_AppDefined,
     197                 :                   "Recode from %s with CPLRecodeFromWChar() failed because"
     198                 :                   " the width of characters in the encoding are not known.",
     199               0 :                   pszSrcEncoding );
     200               0 :         return CPLStrdup("");
     201                 :     }
     202                 : 
     203               0 :     GByte *pszIconvSrcBuf = (GByte*) CPLCalloc((nSrcLen+1),nTargetCharWidth);
     204                 :     unsigned int iSrc;
     205                 : 
     206               0 :     for( iSrc = 0; iSrc <= nSrcLen; iSrc++ )
     207                 :     {
     208               0 :         if( nTargetCharWidth == 1 )
     209               0 :             pszIconvSrcBuf[iSrc] = (GByte) pwszSource[iSrc];
     210               0 :         else if( nTargetCharWidth == 2 )
     211               0 :             ((short *)pszIconvSrcBuf)[iSrc] = (short) pwszSource[iSrc];
     212               0 :         else if( nTargetCharWidth == 4 )
     213               0 :             ((GInt32 *)pszIconvSrcBuf)[iSrc] = pwszSource[iSrc];
     214                 :     }
     215                 : 
     216                 : /* -------------------------------------------------------------------- */
     217                 : /*      Create the iconv() translation object.                          */
     218                 : /* -------------------------------------------------------------------- */
     219                 :     iconv_t sConv;
     220                 : 
     221               0 :     sConv = iconv_open( pszDstEncoding, pszSrcEncoding );
     222                 : 
     223               0 :     if ( sConv == (iconv_t)-1 )
     224                 :     {
     225               0 :         CPLFree( pszIconvSrcBuf );
     226                 :         CPLError( CE_Warning, CPLE_AppDefined, 
     227                 :                   "Recode from %s to %s failed with the error: \"%s\".", 
     228               0 :                   pszSrcEncoding, pszDstEncoding, strerror(errno) );
     229                 : 
     230               0 :         return CPLStrdup( "" );
     231                 :     }
     232                 : 
     233                 : /* -------------------------------------------------------------------- */
     234                 : /*      XXX: There is a portability issue: iconv() function could be    */
     235                 : /*      declared differently on different platforms. The second         */
     236                 : /*      argument could be declared as char** (as POSIX defines) or      */
     237                 : /*      as a const char**. Handle it with the ICONV_CPP_CONST macro here.   */
     238                 : /* -------------------------------------------------------------------- */
     239               0 :     ICONV_CPP_CONST char *pszSrcBuf = (ICONV_CPP_CONST char *) pszIconvSrcBuf;
     240                 : 
     241                 :     /* iconv expects a number of bytes, not characters */
     242               0 :     nSrcLen *= sizeof(wchar_t);
     243                 : 
     244                 : /* -------------------------------------------------------------------- */
     245                 : /*      Allocate destination buffer.                                    */
     246                 : /* -------------------------------------------------------------------- */
     247               0 :     size_t  nDstCurLen = MAX(CPL_RECODE_DSTBUF_SIZE, nSrcLen + 1);
     248               0 :     size_t  nDstLen = nDstCurLen;
     249               0 :     char    *pszDestination = (char *)CPLCalloc( nDstCurLen, sizeof(char) );
     250               0 :     char    *pszDstBuf = pszDestination;
     251                 : 
     252               0 :     while ( nSrcLen > 0 )
     253                 :     {
     254                 :         size_t  nConverted =
     255               0 :             iconv( sConv, &pszSrcBuf, &nSrcLen, &pszDstBuf, &nDstLen );
     256                 : 
     257               0 :         if ( nConverted == (size_t)-1 )
     258                 :         {
     259               0 :             if ( errno == EILSEQ )
     260                 :             {
     261                 :                 // Skip the invalid sequence in the input string.
     262               0 :                 nSrcLen--;
     263               0 :                 pszSrcBuf += sizeof(wchar_t);
     264               0 :                 if (!bHaveWarned2)
     265                 :                 {
     266               0 :                     bHaveWarned2 = TRUE;
     267                 :                     CPLError(CE_Warning, CPLE_AppDefined,
     268                 :                             "One or several characters couldn't be converted correctly from %s to %s.\n"
     269                 :                             "This warning will not be emitted anymore",
     270               0 :                              pszSrcEncoding, pszDstEncoding);
     271                 :                 }
     272               0 :                 continue;
     273                 :             }
     274                 : 
     275               0 :             else if ( errno == E2BIG )
     276                 :             {
     277                 :                 // We are running out of the output buffer.
     278                 :                 // Dynamically increase the buffer size.
     279               0 :                 size_t nTmp = nDstCurLen;
     280               0 :                 nDstCurLen *= 2;
     281                 :                 pszDestination =
     282               0 :                     (char *)CPLRealloc( pszDestination, nDstCurLen );
     283               0 :                 pszDstBuf = pszDestination + nTmp - nDstLen;
     284               0 :                 nDstLen += nDstCurLen - nTmp;
     285               0 :                 continue;
     286                 :             }
     287                 : 
     288                 :             else
     289               0 :                 break;
     290                 :         }
     291                 :     }
     292                 : 
     293               0 :     pszDestination[nDstCurLen - nDstLen] = '\0';
     294                 : 
     295               0 :     iconv_close( sConv );
     296                 : 
     297               0 :     CPLFree( pszIconvSrcBuf );
     298                 : 
     299               0 :     return pszDestination;
     300                 : }
     301                 : 
     302                 : /************************************************************************/
     303                 : /*                        CPLRecodeToWCharIconv()                       */
     304                 : /************************************************************************/
     305                 : 
     306                 : /**
     307                 :  * Convert UTF-8 string to a wchar_t string.
     308                 :  *
     309                 :  * Convert a 8bit, multi-byte per character input string into a wide
     310                 :  * character (wchar_t) string using the iconv() function.
     311                 :  *
     312                 :  * Note that the wchar_t type varies in size on different systems. On
     313                 :  * win32 it is normally 2 bytes, and on unix 4 bytes.
     314                 :  *
     315                 :  * If an error occurs an error may, or may not be posted with CPLError(). 
     316                 :  *
     317                 :  * @param pszSource input multi-byte character string.
     318                 :  * @param pszSrcEncoding source encoding, typically CPL_ENC_UTF8.
     319                 :  * @param pszDstEncoding destination encoding, typically CPL_ENC_UCS2. 
     320                 :  *
     321                 :  * @return the zero terminated wchar_t string (to be freed with CPLFree()) or
     322                 :  * NULL on error.
     323                 :  */
     324                 : 
     325               0 : wchar_t *CPLRecodeToWCharIconv( const char *pszSource,
     326                 :                                 const char *pszSrcEncoding, 
     327                 :                                 const char *pszDstEncoding )
     328                 : 
     329                 : {
     330                 :     return (wchar_t *)CPLRecodeIconv( pszSource,
     331               0 :                                       pszSrcEncoding, pszDstEncoding);
     332                 : }
     333                 : 
     334                 : #endif /* CPL_RECODE_ICONV */

Generated by: LCOV version 1.7