#ifndef CHAR_H #define CHAR_H /* -------------------------------------------------------------------------- * Character set handling: * * Hugs follows Haskell 1.3 in assuming that input uses the ISO-8859-1 * (Latin-1) character set. The following code provides methods for * classifying input characters according to the lexical structure * specified by the report. Hugs should still accept older programs * because ASCII is just a subset of the Latin-1 character set. * * Extended to Unicode by Dimitry Golubovsky . * ------------------------------------------------------------------------*/ /* Possibly shorter version of Char for use in arrays. */ #if UNICODE_CHARS typedef Char ShortChar; #else typedef unsigned char ShortChar; #endif /* -------------------------------------------------------------------------- * Character classification and other primitives. * ------------------------------------------------------------------------*/ extern Bool charTabBuilt; extern unsigned char charTable[]; #if UNICODE_CHARS /* cf HS_CHAR_MAX in HsFFI.h */ #define MAXCHARVAL 0x10FFFF #else #define MAXCHARVAL (NUM_LAT1_CHARS-1) #endif #define isIn(c,x) (charTable[(unsigned char)(c)]&(x)) #define isLatin1(c) (0<=(c) && (c) # include # define MAX_CHAR_ENCODING MB_LEN_MAX #elif CHAR_ENCODING_UTF8 # define MAX_CHAR_ENCODING 6 #else # define MAX_CHAR_ENCODING 1 #endif #if CHAR_ENCODING extern int fputc_mb Args((Char, FILE *)); extern int fgetc_mb Args((FILE *)); extern Void addc_mb Args((Char, String *)); extern Char extc_mb Args((String *)); #define FPutChar(c,f) fputc_mb(c,f) #define FGetChar(f) fgetc_mb(f) #define AddChar(c,s) addc_mb(c,&s) #define ExtractChar(s) extc_mb(&s) #if CHAR_ENCODING_UTF8 #define charIsRepresentable(c) TRUE #else extern Bool charIsRepresentable Args((Char)); #endif #else /* !CHAR_ENCODING */ #define FPutChar(c,f) (fputc(c, f)) #define FGetChar(f) (getc(f)) #define AddChar(c,s) (*(s)++ = (c)) #define ExtractChar(s) (*(unsigned char *)(s)++) #define charIsRepresentable(c) isLatin1(c) #endif /* !CHAR_ENCODING */ #endif /* CHAR_H */