NAME

       Tcl_GetEncoding,   Tcl_FreeEncoding,   Tcl_ExternalToUtfD­
       String,    Tcl_ExternalToUtf,    Tcl_UtfToExternalDString,
       Tcl_UtfToExternal,  Tcl_WinTCharToUtf,  Tcl_WinUtfToTChar,
       Tcl_GetEncodingName, Tcl_SetSystemEncoding,  Tcl_GetEncod­
       ingNames,  Tcl_CreateEncoding,  Tcl_GetDefaultEncodingDir,
       Tcl_SetDefaultEncodingDir - procedures  for  creating  and
       using encodings.


SYNOPSIS

       #include <tcl.h>

       Tcl_Encoding
       Tcl_GetEncoding(interp, name)

       void
       Tcl_FreeEncoding(encoding)

       char *
       Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr)

       int
       Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
            dstCharsPtr)

       char *
       Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr)

       int
       Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
            dstCharsPtr)

       char *
       Tcl_WinTCharToUtf(tsrc, srcLen, dstPtr)

       TCHAR *
       Tcl_WinUtfToTChar(src, srcLen, dstPtr)

       char *
       Tcl_GetEncodingName(encoding)

       int
       Tcl_SetSystemEncoding(interp, name)

       void
       Tcl_GetEncodingNames(interp)

       Tcl_Encoding
       Tcl_CreateEncoding(typePtr)

       char *
       Tcl_SetDefaultEncodingDir(path)




ARGUMENTS

       Tcl_Interp          *interp        (in)      Interpreter
                                                    to  use   for
                                                    error report­
                                                    ing, or  NULL
                                                    if  no  error
                                                    reporting  is
                                                    desired.

       CONST char          *name          (in)      Name       of
                                                    encoding   to
                                                    load.

       Tcl_Encoding        encoding       (in)      The  encoding
                                                    to     query,
                                                    free,  or use
                                                    for  convert­
                                                    ing text.  If
                                                    encoding   is
                                                    NULL,     the
                                                    current  sys­
                                                    tem  encoding
                                                    is used.

       CONST char          *src           (in)      For       the
                                                    Tcl_ExternalToUtf
                                                    functions, an
                                                    array      of
                                                    bytes in  the
                                                    specified
                                                    encoding that
                                                    are   to   be
                                                    converted  to
                                                    UTF-8.    For
                                                    the
                                                    Tcl_UtfToEx­
                                                    ternal    and
                                                    Tcl_Win­
                                                    UtfToTChar
                                                    functions, an
                                                    array      of
                                                    UTF-8 charac­
                                                    ters  to   be
                                                    converted  to
                                                    the specified
                                                    encoding.

       CONST TCHAR         *tsrc          (in)      An  array  of
                                                    convert    to
                                                    UTF-8.

       int                 srcLen         (in)      Length of src
                                                    or  tsrc   in
                                                    bytes.     If
                                                    the length is
                                                    negative, the
                                                    encoding-spe­
                                                    cific  length
                                                    of the string
                                                    is used.

       Tcl_DString         *dstPtr        (out)     Pointer to an
                                                    uninitialized
                                                    or       free
                                                    Tcl_DString
                                                    in  which the
                                                    converted
                                                    result   will
                                                    be stored.

       int                 flags          (in)      Various  flag
                                                    bits    OR-ed
                                                    together.
                                                    TCL_ENCOD­
                                                    ING_START
                                                    signifies
                                                    that      the
                                                    source buffer
                                                    is the  first
                                                    block   in  a
                                                    (potentially
                                                    multi-block)
                                                    input stream,
                                                    telling   the
                                                    conversion
                                                    routine    to
                                                    reset  to  an
                                                    initial state
                                                    and   perform
                                                    any  initial­
                                                    ization  that
                                                    needs      to
                                                    occur  before
                                                    the     first
                                                    byte is  con­
                                                    verted.
                                                    TCL_ENCOD­
                                                    ING_END  sig­
                                                    nifies   that
                                                    the    source
                                                    a     (poten­
                                            




NAME

       Tcl_GetEncoding,   Tcl_FreeEncoding,   Tcl_ExternalToUtfD­
       String,    Tcl_ExternalToUtf,    Tcl_UtfToExternalDString,
       Tcl_UtfToExternal,  Tcl_WinTCharToUtf,  Tcl_WinUtfToTChar,
       Tcl_GetEncodingName, Tcl_SetSystemEncoding,  Tcl_GetEncod­
       ingNames,  Tcl_CreateEncoding,  Tcl_GetDefaultEncodingDir,
       Tcl_SetDefaultEncodingDir - procedures  for  creating  and
       using encodings.


SYNOPSIS

       #include <tcl.h>

       Tcl_Encoding
       Tcl_GetEncoding(interp, name)

       void
       Tcl_FreeEncoding(encoding)

       char *
       Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr)

       int
       Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
            dstCharsPtr)

       char *
       Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr)

       int
       Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
            dstCharsPtr)

       char *
       Tcl_WinTCharToUtf(tsrc, srcLen, dstPtr)

       TCHAR *
       Tcl_WinUtfToTChar(src, srcLen, dstPtr)

       char *
       Tcl_GetEncodingName(encoding)

       int
       Tcl_SetSystemEncoding(interp, name)

       void
       Tcl_GetEncodingNames(interp)

       Tcl_Encoding
       Tcl_CreateEncoding(typePtr)

       char *
       Tcl_SetDefaultEncodingDir(path)




ARGUMENTS

       Tcl_Interp          *interp        (in)      Interpreter
                                                    to  use   for
                                                    error report­
                                                    ing, or  NULL
                                                    if  no  error
                                                    reporting  is
                                                    desired.

       CONST char          *name          (in)      Name       of
                                                    encoding   to
                                                    load.

       Tcl_Encoding        encoding       (in)      The  encoding
                                                    to     query,
                                                    free,  or use
                                                    for  convert­
                                                    ing text.  If
                                                    encoding   is
                                                    NULL,     the
                                                    current  sys­
                                                    tem  encoding
                                                    is used.

       CONST char          *src           (in)      For       the
                                                    Tcl_ExternalToUtf
                                                    functions, an
                                                    array      of
                                                    bytes in  the
                                                    specified
                                                    encoding that
                                                    are   to   be
                                                    converted  to
                                                    UTF-8.    For
                                                    the
                                                    Tcl                     the      next
                                                    piece of  the
                                                    stream so the
                                                    conversion
                                                    routine knows
                                                    what state it
                                                    was  in  when
                                                    it  left  off
                                                    at the end of
                                                    the      last
                                                    piece.    May
                                                    be  NULL,  in
                                                    which    case
                                                    the     value
                                                    specified for
                                                    flags      is
                                                    ignored   and
                                                    the    source
                                                    buffer     is
                                                    assumed    to
                                                    contain   the
                                                    complete
                                                    string     to
                                                    convert.

       char                *dst           (out)     Buffer     in
                                                    which     the
                                                    converted
                                                    result   will
                                                    be    stored.
                                                    No  more than
                                                    dstLen  bytes
                                                    will       be
                                                    stored     in
                                                    dst.

       int                 dstLen         (in)      The   maximum
                                                    length of the
                                                    output buffer
                                                    the number of
                                                    bytes    from
                                                    src that were
                                                    actually con­
                                                    verted.  This
                                                    may  be  less
                                                    than      the
                                                    original
                                                    source length
                                                    if  there was
                                                    a     problem
                                                    converting
                                                    some   source
                                                    characters.
                                                    May be  NULL.

       int                 *dstWrotePtr   (out)     Filled   with
                                                    the number of
                                                    bytes    that
                                                    were actually
                                                    stored in the
                                                    output buffer
                                                    as  a  result
                                                    of  the  con­
                                                    version.  May
                                                    be NULL.

       int                 *dstCharsPtr   (out)     Filled   with
                                                    the number of
                                                    characters
                                                    that   corre­
                                                    spond to  the
                                                    number     of
                                                    bytes  stored
                                                    in the output
                                                    buffer.   May
                                                    be NULL.

       Tcl_EncodingType    *typePtr       (in)      Structure
                                                    that  defines
                                                    a new type of
                                                    encoding.

       char                *path          (in)      A path to the
                                                    location   of
                                                    the  encoding
                                                    file.
_________________________________________________________________


INTRODUCTION

       These  routines  convert  between Tcl's internal character
       representation, UTF-8, and character representations  used
       such as such as obtaining the names of files or displaying
       characters using international fonts, the strings must  be
       translated  into one or possibly multiple formats that the
       various system calls can use.  For instance, on a Japanese
       Unix  workstation,  a  user might obtain a filename repre­
       sented in the EUC-JP file encoding and then translate  the
       characters  to the jisx0208 font encoding in order to dis­
       play the filename in a Tk  widget.   The  purpose  of  the
       encoding  package  is  to help bridge the translation gap.
       UTF-8 provides an intermediate staging ground for all  the
       various  encodings.   In  the example above, text would be
       translated into UTF-8  from  whatever  file  encoding  the
       operating  system  is  using.  Then it would be translated
       from UTF-8 into whatever font encoding  the  display  rou­
       tines require.

       Some basic encodings are compiled into Tcl.  Others can be
       defined by the user or dynamically  loaded  from  encoding
       files in a platform-independent manner.


DESCRIPTION

       Tcl_GetEncoding  finds  an  encoding  given its name.  The
       name may refer to a builtin Tcl encoding,  a  user-defined
       encoding  registered  by  calling Tcl_CreateEncoding, or a
       dynamically-loadable encoding file.  The return value is a
       token that represents the encoding and can be used in sub­
       sequent calls to procedures such  as  Tcl_GetEncodingName,
       Tcl_FreeEncoding,  and Tcl_UtfToExternal.  If the name did
       not refer to any  known  or  loadable  encoding,  NULL  is
       returned and an error message is returned in interp.

       The encoding package maintains a database of all encodings
       currently in use.  The first time name is seen, Tcl_GetEn­
       coding  returns  an  encoding with a reference count of 1.
       If the same name is requested further times, then the ref­
       erence  count for that encoding is incremented without the
       overhead of allocating a new encoding and all its  associ­
       ated data structures.

       When  an  encoding  is  no longer needed, Tcl_FreeEncoding
       should be called to release it.  When an  encoding  is  no
       longer  in  use  anywhere (i.e., it has been freed as many
       times as it has been gotten) Tcl_FreeEncoding will release
       all  storage the encoding was using and delete it from the
       database.

       Tcl_ExternalToUtfDString converts a source buffer src from
       the  specified  encoding  into UTF-8.  The converted bytes
       are stored in dstPtr, which is then NULL terminated.   The
       caller  should eventually call Tcl_DStringFree to free any
       information stored in dstPtr.  When converting, if any  of
       the  characters in the source buffer cannot be represented
       stored in the DString.

       Tcl_ExternalToUtf converts a source buffer  src  from  the
       specified  encoding  into  UTF-8.   Up to srcLen bytes are
       converted from the source buffer and  up  to  dstLen  con­
       verted bytes are stored in dst.  In all cases, *srcReadPtr
       is filled with the number of bytes that were  successfully
       converted  from  src  and  *dstWrotePtr is filled with the
       corresponding number of bytes that  were  stored  in  dst.
       The return value is one of the following:

              TCL_OK                       All  bytes of src were
                                           converted.

              TCL_CONVERT_NOSPACE          The destination buffer
                                           was  not  large enough
                                           for all  of  the  con­
                                           verted  data;  as many
                                           characters  as   could
                                           fit   were   converted
                                           though.

              TCL_CONVERT_MULTIBYTE        The last fews bytes in
                                           the source buffer were
                                           the  beginning  of   a
                                           multibyte    sequence,
                                           but  more  bytes  were
                                           needed   to   complete
                                           this sequence.  A sub­
                                           sequent  call  to  the
                                           conversion     routine
                                           should  pass  a buffer
                                           containing the  uncon­
                                           verted    bytes   that
                                           remained in  src  plus
                                           some   further   bytes
                                           from the source stream
                                           to   properly  convert
                                           the formerly  split-up
                                           multibyte sequence.

              TCL_CONVERT_SYNTAX           The source buffer con­
                                           tained   an    invalid
                                           character    sequence.
                                           This may occur if  the
                                           input  stream has been
                                           damaged  or   if   the
                                           input  encoding method
                                           was misidentified.

              TCL_CONVERT_UNKNOWN          The source buffer con­
                                           tained   a   character
                                           target  encoding   and
                                           TCL_ENCODING_STOPON­
                                           ERROR was specified.

       Tcl_UtfToExternalDString converts a source buffer src from
       UTF-8  into  the  specified encoding.  The converted bytes
       are stored in dstPtr, which is then  terminated  with  the
       appropriate  encoding-specific  NULL.   The  caller should
       eventually call Tcl_DStringFree to  free  any  information
       stored  in dstPtr.  When converting, if any of the charac­
       ters in the source buffer cannot  be  represented  in  the
       target  encoding,  a  default  fallback  character will be
       used.  The return value is a pointer to the  value  stored
       in the DString.

       Tcl_UtfToExternal  converts a source buffer src from UTF-8
       into the specified encoding.  Up to srcLen bytes are  con­
       verted  from  the source buffer and up to dstLen converted
       bytes are stored in dst.  In  all  cases,  *srcReadPtr  is
       filled  with  the  number  of bytes that were successfully
       converted from src and *dstWrotePtr  is  filled  with  the
       corresponding  number  of  bytes  that were stored in dst.
       The return values are the same as the  return  values  for
       Tcl_ExternalToUtf.

       Tcl_WinUtfToTChar  and  Tcl_WinTCharToUtf are Windows-only
       convenience functions for  converting  between  UTF-8  and
       Windows strings.  On Windows 95 (as with the Macintosh and
       Unix operating systems), all strings exchanged between Tcl
       and the operating system are "char" based.  On Windows NT,
       some strings exchanged between Tcl and the operating  sys­
       tem  are  "char" oriented while others are in Unicode.  By
       convention, in Windows a TCHAR is a character in the  ANSI
       code page on Windows 95 and a Unicode character on Windows
       NT.

       If you planned to use the same "char" based interfaces  on
       both Windows 95 and Windows NT, you could use Tcl_UtfToEx­
       ternal and Tcl_ExternalToUtf (or their Tcl_DString equiva­
       lents) with an encoding of NULL (the current system encod­
       ing).  On the other hand, if you planned to use  the  Uni­
       code  interface  when running on Windows NT and the "char"
       interfaces when running on Windows 95, you would  have  to
       perform  the  following type of test over and over in your
       program (as represented in psuedo-code):
              if (running NT) {
                  encoding <- Tcl_GetEncoding("unicode");
                  nativeBuffer <- Tcl_UtfToExternal(encoding, utfBuffer);
                  Tcl_FreeEncoding(encoding);
              } else {
                  nativeBuffer <- Tcl_UtfToExternal(NULL, utfBuffer);
       Tcl_WinUtfToTChar and Tcl_WinTCharToUtf automatically han­
       pointer to a TCHAR string, and Tcl_WinTCharToUtf expects a
       TCHAR string pointer as the src string.  Otherwise,  these
       functions  behave  identically to Tcl_UtfToExternalDString
       and Tcl_ExternalToUtfDString.

       Tcl_GetEncodingName is roughly the inverse  of  Tcl_GetEn­
       coding.   Given  an encoding, the return value is the name
       argument that was used to create the encoding.  The string
       returned by Tcl_GetEncodingName is only guaranteed to per­
       sist until the encoding is deleted.  The caller  must  not
       modify this string.

       Tcl_SetSystemEncoding   sets  the  default  encoding  that
       should be used whenever the user passes a NULL  value  for
       the  encoding  argument to any of the other encoding func­
       tions.  If name is NULL, the system encoding is  reset  to
       the  default system encoding, binary.  If the name did not
       refer to any known  or  loadable  encoding,  TCL_ERROR  is
       returned  and  an error message is left in interp.  Other­
       wise, this procedure increments the reference count of the
       new system encoding, decrements the reference count of the
       old system encoding, and returns TCL_OK.

       Tcl_GetEncodingNames sets the interp result to a list con­
       sisting  of  the  names of all the encodings that are cur­
       rently defined or can be dynamically loaded, searching the
       encoding   path  specified  by  Tcl_SetDefaultEncodingDir.
       This procedure does not ensure that the  dynamically-load­
       able  encoding  files  contain valid data, but merely that
       they exist.

       Tcl_CreateEncoding defines a new  encoding  and  registers
       the  C  procedures that are called back to convert between
       the encoding and UTF-8.   Encodings  created  by  Tcl_Cre­
       ateEncoding are thereafter visible in the database used by
       Tcl_GetEncoding.  Just as with the Tcl_GetEncoding  proce­
       dure,  the  return  value  is  a token that represents the
       encoding and can be used  in  subsequent  calls  to  other
       encoding  functions.  Tcl_CreateEncoding returns an encod­
       ing with a reference count of 1. If an encoding  with  the
       specified  name  already  exists,  then  its  entry in the
       database is replaced with the new encoding; the token  for
       the  old encoding will remain valid and continue to behave
       as before, but users of the new token will  now  call  the
       new encoding procedures.

       The typePtr argument to Tcl_CreateEncoding contains infor­
       mation about the name of the encoding and  the  procedures
       that  will  be called to convert between this encoding and
       UTF-8.  It is defined as follows:

              typedef struct Tcl_EncodingType {
                Tcl_EncodingConvertProc *fromUtfProc;
                Tcl_EncodingFreeProc *freeProc;
                ClientData clientData;
                int nullSize;
              } Tcl_EncodingType;

       The encodingName provides a string name for the  encoding,
       by  which  it  can be referred in other procedures such as
       Tcl_GetEncoding.  The toUtfProc refers to a callback  pro­
       cedure  to  invoke to convert text from this encoding into
       UTF-8.  The fromUtfProc refers to a callback procedure  to
       invoke to convert text from UTF-8 into this encoding.  The
       freeProc refers to a callback  procedure  to  invoke  when
       this encoding is deleted.  The freeProc field may be NULL.
       The clientData contains an arbitrary one-word value passed
       to  toUtfProc, fromUtfProc, and freeProc whenever they are
       called.  Typically, this is a pointer to a data  structure
       containing  encoding-specific information that can be used
       by the callback procedures.  For instance, two very  simi­
       lar  encodings such as ascii and macRoman may use the same
       callback procedure, but use different values of clientData
       to  control its behavior.  The nullSize specifies the num­
       ber of zero  bytes  that  signify  end-of-string  in  this
       encoding.   It  must  be  1 (for single-byte or multi-byte
       encodings like ASCII or Shift-JIS) or 2  (for  double-byte
       encodings  like Unicode).  Constant-sized encodings with 3
       or more bytes per character (such  as  CNS11643)  are  not
       accepted.

       The  callback  procedures toUtfProc and fromUtfProc should
       match the type Tcl_EncodingConvertProc:

              typedef int Tcl_EncodingConvertProc(
                ClientData clientData,
                CONST char *src,
                int srcLen,
                int flags,
                Tcl_Encoding *statePtr,
                char *dst,
                int dstLen,
                int *srcReadPtr,
                int *dstWrotePtr,
                int *dstCharsPtr);

       The toUtfProc and fromUtfProc procedures are called by the
       Tcl_ExternalToUtf or Tcl_UtfToExternal family of functions
       to perform the actual conversion.  The clientData  parame­
       ter  to  these  procedures  is  the same as the clientData
       field specified to Tcl_CreateEncoding  when  the  encoding
       was created.  The remaining arguments to the callback pro­
       cedures are the same as the arguments, documented  at  the
       top,  to  Tcl_ExternalToUtf or Tcl_UtfToExternal, with the
       to the callback procedure will be the  appropriate  encod­
       ing-specific   string  length  of  src.   If  any  of  the
       srcReadPtr, dstWrotePtr, or dstCharsPtr arguments  to  one
       of  the  high-level  functions  is NULL, the corresponding
       value passed to the callback procedure will be a  non-NULL
       location.

       The callback procedure freeProc, if non-NULL, should match
       the type Tcl_EncodingFreeProc:
              typedef void Tcl_EncodingFreeProc(
                ClientData clientData);

       This freeProc function is  called  when  the  encoding  is
       deleted.   The  clientData  parameter  is  the same as the
       clientData field specified to Tcl_CreateEncoding when  the
       encoding was created.


       Tcl_GetDefaultEncodingDir   and  Tcl_SetDefaultEncodingDir
       access and set the directory  to  use  when  locating  the
       default  encoding  files.   If this value is not NULL, the
       TclpInitLibraryPath routine appends the path to  the  head
       of  the search path, and uses this path as the first place
       to look into when trying to locate the encoding file.



ENCODING FILES

       Space would prohibit precompiling into Tcl every  possible
       encoding  algorithm,  so many encodings are stored on disk
       as dynamically-loadable  encoding  files.   This  behavior
       also  allows  the user to create additional encoding files
       that can be loaded using the same mechanism.  These encod­
       ing  files  contain  information  about  the tables and/or
       escape sequences used to map between an external  encoding
       and Unicode.  The external encoding may consist of single-
       byte, multi-byte, or double-byte characters.

       Each dynamically-loadable encoding  is  represented  as  a
       text file.  The initial line of the file, beginning with a
       ``#'' symbol, is a comment that provides a  human-readable
       description  of  the  file.   The next line identifies the
       type of encoding file.  It can be  one  of  the  following
       letters:

       [1]   S
              A  single-byte  encoding,  where  one  character is
              always one byte long in the encoding.   An  example
              is iso8859-1, used by many European languages.

       [2]   D
              A  double-byte  encoding,  where  one  character is
              always two bytes long in the encoding.  An  example
              A  multi-byte  encoding, where one character may be
              either one or two bytes long.  Certain bytes are  a
              lead  bytes, indicating that another byte must fol­
              low and that together the two bytes  represent  one
              character.  Other bytes are not lead bytes and rep­
              resent themselves.  An example is shiftjis, used by
              many Japanese computers.

       [4]   E
              An  escape-sequence  encoding, specifying that cer­
              tain sequences of bytes do  not  represent  charac­
              ters,  but  commands  that  describe  how following
              bytes should be interpreted.

       The rest of the lines in the file depend on the type.

       Cases [1], [2], and [3] are collectively  referred  to  as
       table-based  encoding  files.   The lines in a table-based
       encoding file are in the same format as this example taken
       from  the  shiftjis  encoding  (this  is  not the complete
       file):
              # Encoding file: shiftjis, multi-byte
              M
              003F 0 40
              00
              0000000100020003000400050006000700080009000A000B000C000D000E000F
              0010001100120013001400150016001700180019001A001B001C001D001E001F
              0020002100220023002400250026002700280029002A002B002C002D002E002F
              0030003100320033003400350036003700380039003A003B003C003D003E003F
              0040004100420043004400450046004700480049004A004B004C004D004E004F
              0050005100520053005400550056005700580059005A005B005C005D005E005F
              0060006100620063006400650066006700680069006A006B006C006D006E006F
              0070007100720073007400750076007700780079007A007B007C007D203E007F
              0080000000000000000000000000000000000000000000000000000000000000
              0000000000000000000000000000000000000000000000000000000000000000
              0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F
              FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F
              FF80FF81FF82FF83FF84FF85FF86FF87FF88FF89FF8AFF8BFF8CFF8DFF8EFF8F
              FF90FF91FF92FF93FF94FF95FF96FF97FF98FF99FF9AFF9BFF9CFF9DFF9EFF9F
              0000000000000000000000000000000000000000000000000000000000000000
              0000000000000000000000000000000000000000000000000000000000000000
              81
              0000000000000000000000000000000000000000000000000000000000000000
              0000000000000000000000000000000000000000000000000000000000000000
              0000000000000000000000000000000000000000000000000000000000000000
              0000000000000000000000000000000000000000000000000000000000000000
              300030013002FF0CFF0E30FBFF1AFF1BFF1FFF01309B309C00B4FF4000A8FF3E
              FFE3FF3F30FD30FE309D309E30034EDD30053006300730FC20152010FF0F005C
              301C2016FF5C2026202520182019201C201DFF08FF0930143015FF3BFF3DFF5B
              FF5D30083009300A300B300C300D300E300F30103011FF0B221200B100D70000
              00F7FF1D2260FF1CFF1E22662267221E22342642264000B0203220332103FFE5
              FF0400A200A3FF05FF03FF06FF0AFF2000A72606260525CB25CF25CE25C725C6
              000000000000000000000000000000002227222800AC21D221D4220022030000
              0000000000000000000000000000000000000000222022A52312220222072261
              2252226A226B221A223D221D2235222B222C0000000000000000000000000000
              212B2030266F266D266A2020202100B6000000000000000025EF000000000000

       The third line of the file is three  numbers.   The  first
       number  is the fallback character (in base 16) to use when
       converting from UTF-8 to this encoding.  The second number
       is  a  1 if this file represents the encoding for a symbol
       font, or 0 otherwise.  The last number (in base 10) is how
       many pages of data follow.

       Subsequent  lines  in  the  example  above  are pages that
       describe how to map from the encoding into 2-byte Unicode.
       The first line in a page identifies the page number.  Fol­
       lowing it are 256 double-byte numbers, arranged as 16 rows
       of  16  numbers.   Given  a character in the encoding, the
       high byte of that character is used to select which  page,
       and  the low byte of that character is used as an index to
       select one of the double-byte numbers in that page  -  the
       value  obtained being the corresponding Unicode character.
       By examination of the example above, one can see that  the
       characters  0x7E  and  0x8163  in shiftjis map to 203E and
       2026 in Unicode, respectively.

       Following the first page will be all the other pages, each
       in  the  same  format as the first: one number identifying
       the page followed by 256 double-byte  Unicode  characters.
       If a character in the encoding maps to the Unicode charac­
       ter 0000, it means that  the  character  doesn't  actually
       exist.   If  all  characters  on a page would map to 0000,
       that page can be omitted.

       Case [4] is the escape-sequence encoding file.  The  lines
       in  an  this  type  of file are in the same format as this
       example taken from the iso2022-jp encoding:
              # Encoding file: iso2022-jp, escape-driven
              E
              init           {}
              final          {}
              iso8859-1      \x1b(B
              jis0201        \x1b(J
              jis0208        \x1b$@
              jis0208        \x1b$B
              jis0212        \x1b$(D
              gb2312         \x1b$A
              ksc5601        \x1b$(C

       In the file, the first column represents an option and the
       second  column  is the associated value.  init is a string
       to emit or expect before the first character is converted,
       while  final  is a string to emit or expect after the last
       that marks that encoding.  Tcl syntax is used for the val­
       ues; in the above example, for instance, ``{}'' represents
       the empty string and ``\x1b'' represents character 27.

       When Tcl_GetEncoding encounters an encoding name that  has
       not  been  loaded,  it  attempts  to load an encoding file
       called name.enc from the  encoding  subdirectory  of  each
       directory  specified in the library path $tcl_libPath.  If
       the encoding file exists, but is malformed, an error  mes­
       sage will be left in interp.


KEYWORDS

       utf, encoding, convert