Logo Search packages:      
Sourcecode: sablotron version File versions

encoding.cpp

/* 
 * The contents of this file are subject to the Mozilla Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 * 
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 * 
 * The Original Code is the Sablotron XSLT Processor.
 * 
 * The Initial Developer of the Original Code is Ginger Alliance Ltd.
 * Portions created by Ginger Alliance are Copyright (C) 2000-2002
 * Ginger Alliance Ltd. All Rights Reserved.
 * 
 * Contributor(s):
 * 
 * Alternatively, the contents of this file may be used under the
 * terms of the GNU General Public License Version 2 or later (the
 * "GPL"), in which case the provisions of the GPL are applicable 
 * instead of those above.  If you wish to allow use of your 
 * version of this file only under the terms of the GPL and not to
 * allow others to use your version of this file under the MPL,
 * indicate your decision by deleting the provisions above and
 * replace them with the notice and other provisions required by
 * the GPL.  If you do not delete the provisions above, a recipient
 * may use your version of this file under either the MPL or the
 * GPL.
 */
#include <errno.h>

#include "encoding.h"
#include "proc.h"
#include "guard.h"
#include "utf8.h"
// #include "shandler.h"

/*
 ******************************
 * internal recoding functions
 * (shall be replaced with a call to the sabconv library function)
 ******************************
 */

// encoding tables for the functions

short EncTable1250[] =
{
    0x20ac,     -1, 0x201a,     -1, 0x201e, 0x2026, 0x2020, 0x2021, 
        -1, 0x2030, 0x0160, 0x2039, 0x015a, 0x0164, 0x017d, 0x0179, 
        -1, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 
        -1, 0x2122, 0x0161, 0x203a, 0x015b, 0x0165, 0x017e, 0x017a, 
    0x00a0, 0x02c7, 0x02d8, 0x0141, 0x00a4, 0x0104, 0x00a6, 0x00a7, 
    0x00a8, 0x00a9, 0x015e, 0x00ab,     -1, 0x00ad, 0x00ae, 0x017b, 
    0x00b0, 0x00b1, 0x02db, 0x0142, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 
    0x00b8, 0x0105, 0x015f, 0x00bb, 0x013d, 0x02dd, 0x013e, 0x017c, 
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, 
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, 
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, 
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9
};

short EncTableLatin2[] =
{
        -1,     -1,     -1,     -1,     -1,     -1,     -1,     -1, 
        -1,     -1,     -1,     -1,     -1,     -1,     -1,     -1, 
        -1,     -1,     -1,     -1,     -1,     -1,     -1,     -1, 
        -1,     -1,     -1,     -1,     -1,     -1,     -1,     -1, 
    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 
    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, 
    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 
    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, 
    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 
    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, 
    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 
    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, 
    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 
    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, 
    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 
    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, 
};

void* encInternalOpen(const Str& enc, Bool toUTF8)
{
    if (!toUTF8)
        return (void*)-1;
    if (enc.eqNoCase("ISO-8859-2"))
        return EncTableLatin2;
    else if (enc.eqNoCase("windows-1250"))
        return EncTable1250;
    // more builtin conversions can come here
    else return (void*)-1;
}

int encInternalClose(void* intCD)
{
    return 0; // iconv's value for OK
}

// the following only does 1-byte encodings for which an EncTable is defined

EncResult encInternalConv(void *intCD, const char** inbuf, 
                                  size_t *inbytesleft,
                                  char **outbuf, size_t *outbytesleft)
{
    assert(intCD && intCD != (void*)-1);
    char charbuf[6];
    int val;
    size_t len;
    unsigned char thischar;
    for (; *inbytesleft > 0; )
    {
        thischar = **inbuf;
        if (thischar < 0x80)
        {
            **outbuf = thischar;
            len = 1;
        }
        else
        {
            val = ((short*)intCD)[thischar - 0x80];
            if (val == -1)
                return ENC_EILSEQ;
            len = utf8FromCharCode(charbuf, val);
            if (len <= *outbytesleft)
                memcpy(*outbuf, charbuf, len);
            else
                return ENC_E2BIG;
        };
        *outbuf += len;
        *outbytesleft -= len;
        (*inbuf)++;
        (*inbytesleft)--;                
    }
    return ENC_OK;
}

/*
 *
 *      Recoder class
 *
 */


Recoder::Recoder()
{
}

Bool Recoder::handledByExpat(const Str& enc) const
{
    return (enc.eqNoCase("UTF-8") ||
        enc.eqNoCase("UTF-16") ||
        enc.eqNoCase("ISO-8859-1") ||
        enc.eqNoCase("US-ASCII"));
}

void Recoder::clear(Sit S)
{
    for (int i = 0; i < items.number(); i++)
    {
        if (items[i] && items[i] -> physCD)
            close(S, items[i]);
    };
    items.freeall(FALSE);
}

Recoder::~Recoder()
{
    // can't clear here (have no situation)
}

eFlag Recoder::open(Sit S, const Str& enc, Bool toUTF8, CDesc& cd)
{
    GP(ConvInfo) newitem = new ConvInfo;
    (*newitem).method = ENC_NONE;
    (*newitem).physCD = NULL;
#ifdef HAVE_ICONV_H
    iconv_t icd = toUTF8 ? iconv_open("UTF-8", enc) : iconv_open(enc, "UTF-8");
    // switch off transliteration in iconv:
    // sadly non-standard, only works in windows port
    // int val = 0;
    // iconvctl(icd, ICONV_SET_TRANSLITERATE, &val);
    if (icd != (iconv_t) -1)
    {
        (*newitem).method = ENC_ICONV;
        (*newitem).physCD = (void *) icd;
    }
    else
#endif
    {
        // try to open internal recode
        void *physcd;
        physcd = encInternalOpen(enc, toUTF8);
        if (physcd != (void*)-1)
        {
            (*newitem).method = ENC_INTERNAL;
            (*newitem).physCD = physcd;
        }
        else
        {
            // try the encoding handler as a last resort
            void* enchlrUD = NULL;
            EncHandler *enchlr = NULL;
              if (S.getProcessor())
                    enchlr = S.getProcessor() -> getEncHandler(&enchlrUD);
            if (enchlr)
            {
                void *physcd = enchlr -> open(enchlrUD, S.getProcessor(), toUTF8 ? EH_TO_UTF8 : EH_FROM_UTF8, enc);
                if (physcd != (void*) -1)
                {
                    (*newitem).method = ENC_HANDLER;
                    (*newitem).physCD = physcd;
                }
            }
        }
    };
    if ((*newitem).method != ENC_NONE)
        items.append(cd = newitem.keep());
    else
        Err1(S, E1_UNKNOWN_ENC, enc);
    return OK;
}

eFlag Recoder::openFromUTF8(Sit S, const Str& enc, CDesc& cd)
{
    return open(S, enc, FALSE, cd);
};

eFlag Recoder::openToUTF8(Sit S, const Str& enc, CDesc& cd)
{
    return open(S, enc, TRUE, cd);
};

eFlag Recoder::close(Sit S, CDesc cd)
{
    assert(cd);
    switch(cd -> method)
    {
    case ENC_ICONV:
#ifdef HAVE_ICONV_H
        iconv_close((iconv_t)(cd -> physCD));
#endif
        break;
    case ENC_INTERNAL:
        encInternalClose(cd -> physCD);
        break;
    case ENC_HANDLER:
        {
            void *enchlrUD = NULL;
              EncHandler *enchlr = NULL;
              if (S.getProcessor())
                    enchlr = S.getProcessor() -> getEncHandler(&enchlrUD);
            if (enchlr)
                enchlr -> close(enchlrUD, S.getProcessor(), cd -> physCD);
        }
        break;
    default: 
        assert(0);
    };
    return OK;
}

eFlag Recoder::conv(Sit S, CDesc cd, const char *& inbuf, size_t &inbytesleft, 
        char *& outbuf, size_t &outbytesleft, EncResult& result)
{
    assert(cd);
    switch(cd -> method)
    {
    case ENC_ICONV:
#ifdef HAVE_ICONV_H
        {
            errno = 0;
            iconv((iconv_t)(cd -> physCD), 
#               ifdef SABLOT_ICONV_CAST_OK
                    (char**)(&inbuf),
#               else
                    &inbuf,
#               endif
                    &inbytesleft, &outbuf, &outbytesleft);
            switch(errno)
            {
            case EINVAL:
                result = ENC_EINVAL; break;
            case E2BIG:
                result = ENC_E2BIG; break;
            case EILSEQ:
                result = ENC_EILSEQ; break;
            default:
                result = ENC_OK;
            };
        };
#else
        assert(0);
#endif
        break;
    case ENC_INTERNAL:
        {
            result = 
                encInternalConv(cd -> physCD, &inbuf, &inbytesleft, 
                &outbuf, &outbytesleft);
        };
        break;
    case ENC_HANDLER:
        {
            void *enchlrUD = NULL;
            EncHandler *enchlr = NULL;
              if (S.getProcessor())
                    enchlr = S.getProcessor() -> getEncHandler(&enchlrUD);
                  if (enchlr)
                  {
                switch(enchlr -> conv(enchlrUD, S.getProcessor(), cd -> physCD, 
                    &inbuf, &inbytesleft, &outbuf, &outbytesleft))
                {
                case EH_EINVAL: result = ENC_EINVAL;
                    break;
                case EH_E2BIG: result = ENC_E2BIG;
                    break;
                case EH_EILSEQ: result = ENC_EILSEQ;
                    break;
                default: result = ENC_OK;
                };
                };
        };
        break;
    default:
        assert(0);
    };
    return OK;
}

void Recoder::report(Sit S, MsgType type, MsgCode code, const Str &arg1, const Str & arg2)
{
    S.message(type, code, arg1, arg2);
}


Generated by  Doxygen 1.6.0   Back to index