/* * charconv_kanji.c * * Example of a Kanji character-conversion callout for MX V5.2 * and V5.3. * Copyright (c) 2008, Matthew Madison. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright owner nor the names of any other contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * MODULE DESCRIPTION: * * This modules contains routines that implement character * conversion for use with MX's VMS MAIL interface. You should * use this module to provide appropriate conversion between the * local native character set and a standard Internet character * set as used in e-mail messages. * * See MX_EXAMPLES_DIR:CHARCONV_EXAMPLE_README.TXT for more * information. * * NOTE: This module was written for recent versions of DEC C * and OpenVMS. You may need to modify the code if you * are using the VAX C compiler and/or have an older version * of the operating system. * */ #include #include #include #include #include #include #include #define __NEW_STARLET #include #include #include #include #include #include #include #define OK(x) $VMS_STATUS_SUCCESS(x) /* * Local type definitions */ typedef u_int32_t vms_status_t; typedef enum { CHARCONV__LOCAL_TO_NETWORK = 1, CHARCONV__NETWORK_TO_LOCAL = 2 } conv_code_t; typedef enum { CHARCONV__LOCAL_CHARSET = 1, CHARCONV__NETWORK_CHARSET = 2 } charset_code_t; typedef enum { BBOOL_FALSE = 0, BBOOL_TRUE = 1 } bbool_t; typedef enum { STATE_ASCII, STATE_KANJI, STATE_KANJI_2 } cs_state_t; #define CTX_S_CSNAME 128 #define CTX_S_BUFFER 65536 typedef struct { conv_code_t ctx_direction; char *ctx_bufptr; bbool_t ctx_use_iconv; iconv_t ctx_cd; char ctx_lcsname[CTX_S_CSNAME]; char ctx_ncsname[CTX_S_CSNAME]; } context_t; /* * Forward declarations */ vms_status_t INIT(const conv_code_t *code, void **ctxptr, u_int16_t *lclcslen, struct dsc$descriptor *lclcs, const struct dsc$descriptor *netcs, const struct dsc$descriptor *usrnam); vms_status_t CONVERT(void **ctxptr, const struct dsc$descriptor *instr, u_int16_t *outlen, struct dsc$descriptor *outstr, bbool_t *converted, u_int16_t *remain); vms_status_t GETCSNAME(void **ctxptr, const charset_code_t *code, u_int16_t *len, struct dsc$descriptor *name); vms_status_t FINISH(void **ctxptr); static void get_local_charset(char csname[CTX_S_CSNAME], const struct dsc$descriptor *usrnam); /* * Local static storage */ static int32_t context_size = sizeof(context_t); /* * External references */ #define MX__FILE_READ 1 vms_status_t MX_FILE_OPEN(u_int32_t code, const struct dsc$descriptor *fspec, ...); vms_status_t MX_FILE_READ(u_int32_t ctx, struct dsc$descriptor *dsc, ...); vms_status_t MX_FILE_CLOSE(u_int32_t ctx); /* * ROUTINE: INIT * * DESCRIPTION: * Initializes for a character conversion. * * This routine should fill in the local character set name even * if it returns a failure status. * * If the netcs parameter is NULL, the caller is expecting * conversion between the local character set and the default * network character set, or vice-versa. If the netcs parameter * is non-NULL, the conversion is between a character set * explicitly named in a MIME header and the local character set. * * In MX V5.3, the usrnam parameter points to a character string * with the username of the local user, so that character set * customization may be performed on a per-user basis. * * PARAMETERS: * code: conv_code_t, read only, by reference * ctxptr: address of context, write only, by reference * lclcslen: word_unsigned, write only, by reference * lclcs: char_string, write only, by descriptor * netcs: char_string, read only, by descriptor (optional) * usrnam: char_string, read only, by descriptor (optional) * * RETURNS: VMS condition value * SS$_NORMAL: initialization successful; ready for conversion * SS$_BADPARAM: invalid parameter, or: * - local and network character sets are identical, no conversion needed * - no conversion available for the specified character set */ vms_status_t INIT (const conv_code_t *code, void **ctxptr, u_int16_t *lclcslen, struct dsc$descriptor *lclcs, const struct dsc$descriptor *netcs, const struct dsc$descriptor *usrnam_optional_arg__) { vms_status_t status; char lbuf[65], nbuf[65], *cp; const char *ptr; int32_t ctxsize; u_int16_t len; context_t ctx, *ctxp; const struct dsc$descriptor *usrnam; unsigned int argc; if (*code != CHARCONV__LOCAL_TO_NETWORK && *code != CHARCONV__NETWORK_TO_LOCAL) return SS$_BADPARAM; va_count(argc); usrnam = (argc > 5 ? usrnam_optional_arg__ : 0); memset(&ctx, 0, sizeof(ctx)); ctx.ctx_direction = *code; get_local_charset(ctx.ctx_lcsname, usrnam); cp = strchr(ctx.ctx_lcsname, '='); if (0 == cp) strcpy(lbuf, ctx.ctx_lcsname); else { *cp++ = '\0'; strcpy(lbuf, cp); } /* * Always fill in the local character set name */ len = strlen(ctx.ctx_lcsname); if (lclcslen != 0) *lclcslen = len; if (lclcs != 0) LIB$SCOPY_R_DX(&len, ctx.ctx_lcsname, lclcs); /* * If a network character set name was provided, * use that instead of the default */ if (netcs != 0) { len = netcs->dsc$w_length; ptr = netcs->dsc$a_pointer; if (len > CTX_S_CSNAME-1) len = CTX_S_CSNAME-1; memcpy(ctx.ctx_ncsname, ptr, len); ctx.ctx_ncsname[len] = '\0'; } else strcpy(ctx.ctx_ncsname, "ISO-2022-JP"); cp = strchr(ctx.ctx_ncsname, '='); if (0 == cp) strcpy(nbuf, ctx.ctx_ncsname); else { *cp++ = '\0'; strcpy(nbuf, cp); } /* * If the character set names are identical, no conversion * needed. */ if (strcasecmp(ctx.ctx_lcsname, ctx.ctx_ncsname) == 0) return SS$_BADPARAM; /* no translation needed */ /* * Try using C character conversion support if it's not * the charset mapping we handle internally */ if (strcasecmp(ctx.ctx_ncsname, "ISO-2022-JP") == 0 && strcasecmp(ctx.ctx_lcsname, "EUC-JP") == 0) { ctx.ctx_use_iconv = BBOOL_FALSE; } else { if (*code == CHARCONV__LOCAL_TO_NETWORK) ctx.ctx_cd = iconv_open(nbuf, lbuf); else ctx.ctx_cd = iconv_open(lbuf, nbuf); if (ctx.ctx_cd == (iconv_t)(-1)) return SS$_BADPARAM; ctx.ctx_use_iconv = BBOOL_TRUE; } /* * If we are successful to this point, allocate and * initialize a context block and conversion buffer. */ ctxsize = context_size + CTX_S_BUFFER; status = LIB$GET_VM(&ctxsize, &ctxp); if (!OK(status)) { if (ctx.ctx_use_iconv) iconv_close(ctx.ctx_cd); return status; } memcpy(ctxp, &ctx, context_size); ctxp->ctx_bufptr = (void *) (ctxp + 1); *ctxptr = ctxp; return SS$_NORMAL; } /* INIT */ /* * ROUTINE: CONVERT * * DESCRIPTION: * Converts a character string. * * PARAMETERS: * ctxptr: context pointer, modify, by reference * instr: char_string, read only, by descriptor * outlen: word_unsigned, write only, by reference * outstr: char_string, write only, by descriptor * converted: BLISS boolean value, write only, by reference * remain: word_unsigned, write only, by reference * * RETURNS: VMS condition value */ vms_status_t CONVERT (void **ctxptr, const struct dsc$descriptor *instr, u_int16_t *outlen, struct dsc$descriptor *outstr, bbool_t *converted, u_int16_t *remain) { context_t *ctx = *ctxptr; char *inp; char *outp; size_t inlen, inremain, outmax, outremain; bbool_t did_one; cs_state_t state; /* * The input and output descriptors will always be either * CLASS_S or CLASS_D. * * If the output descriptor is CLASS_D (dynamic string descriptor), * we perform the conversion into our intermediate buffer, then * copy the result to the output string using LIB$SCOPY_R_DX. * Otherwise, we can perform the conversion directly into the * static string provided by the caller. */ inp = instr->dsc$a_pointer; inlen = instr->dsc$w_length; if (outstr->dsc$b_class == DSC$K_CLASS_D) { outp = ctx->ctx_bufptr; outmax = CTX_S_BUFFER; } else { outp = outstr->dsc$a_pointer; outmax = outstr->dsc$w_length; } did_one = BBOOL_FALSE; inremain = inlen; outremain = outmax; if (ctx->ctx_use_iconv) { size_t result; while (inremain > 0 && outremain > 0) { size_t inremain_in = inremain; result = iconv(ctx->ctx_cd, &inp, &inremain, &outp, &outremain); if (result != (size_t) -1) { if (result < inremain_in) did_one = BBOOL_TRUE; } else if (errno != EILSEQ) break; if (inremain == 0 || outremain == 0) break; *outp++ = *inp++; inremain--; outremain--; } } else { /* * Internal conversion between EUC-JP and ISO-2022-JP */ if (ctx->ctx_direction == CHARCONV__NETWORK_TO_LOCAL) { state = STATE_ASCII; while (inremain > 0 && outremain > 0) { /* * Look for a state-shift sequence: * * ESC ( B -> ASCII * ESC ( J -> JIS X 0201-1976 (Romanji, emulated as ASCII) * ESC $ @ -> JIS X 0208-1978 * ESC $ B -> JIS X 0208-1983 */ if (inremain > 2 && *inp == '\033') { switch (*(inp+1)) { case '(': if (*(inp+2) == 'B' || *(inp+2) == 'J') { state = STATE_ASCII; inp += 3; inremain -= 3; continue; } break; case '$': if (*(inp+2) == '@' || *(inp+2) == 'B') { state = STATE_KANJI; inp += 3; inremain -= 3; did_one = BBOOL_TRUE; continue; } break; default: break; } } switch (state) { case STATE_ASCII: *outp++ = *inp++; break; case STATE_KANJI: if (*inp < ' ') *outp++ = *inp++; else { state = STATE_KANJI_2; *outp++ = *inp++ | 0x80; } break; case STATE_KANJI_2: state = STATE_KANJI; *outp++ = *inp++ | 0x80; break; } /* switch */ outremain -= 1; inremain -= 1; } /* while */ } else { /* local-to-network */ state = STATE_ASCII; while (inremain > 0 && outremain > 0) { if (state == STATE_ASCII && *inp < 0) { did_one = BBOOL_TRUE; state = STATE_KANJI; if (outremain > 3) { *outp++ = '\033'; *outp++ = '$'; *outp++ = 'B'; outremain -= 3; } } else if (state == STATE_KANJI && *inp >= 0) { state = STATE_ASCII; if (outremain > 3) { *outp++ = '\033'; *outp++ = '('; *outp++ = 'B'; outremain -= 3; } } *outp++ = *inp++ & 0x7F; inremain -= 1; outremain -= 1; } /* while */ if (state == STATE_KANJI && outremain >= 3) { *outp++ = '\033'; *outp++ = '('; *outp++ = 'B'; outremain -= 3; } } /* direction */ } /* iconv vs. internal conversion */ /* * Return the actual length of the output string. */ if (outlen != 0) *outlen = outmax - outremain; /* * Let the caller know whether or not any of the characters * were actually converted. */ if (converted != 0) *converted = did_one; /* * Let the caller know how many characters in the input string * were NOT converted. */ if (remain != 0) *remain = inremain; /* * If the output string was dynamic, copy to it from our * conversion buffer. */ if (outstr->dsc$b_class == DSC$K_CLASS_D) { u_int16_t len = outmax - outremain; return LIB$SCOPY_R_DX (&len, ctx->ctx_bufptr, outstr); } else return SS$_NORMAL; } /* CONVERT */ /* * ROUTINE: GETCSNAME * * DESCRIPTION: * Returns a character set name. * * PARAMETERS: * ctxptr: address of context, write only, by reference * code: charset_code_t, read only, by reference * xlen: word_unsigned, write only, by reference * name: char_string, write only, by descriptor * * RETURNS: VMS condition value */ vms_status_t GETCSNAME (void **ctxptr, const charset_code_t *code, u_int16_t *xlen, struct dsc$descriptor *name) { context_t *ctx = *ctxptr; u_int16_t len; char *namep; vms_status_t status; switch (*code) { default: return SS$_BADPARAM; break; case CHARCONV__LOCAL_CHARSET: len = strlen(ctx->ctx_lcsname); namep = ctx->ctx_lcsname; break; case CHARCONV__NETWORK_CHARSET: len = strlen(ctx->ctx_ncsname); namep = ctx->ctx_ncsname; break; } status = LIB$SCOPY_R_DX (&len, namep, name); if (OK(status) && xlen != 0) *xlen = len; return status; } /* GETCSNAME */ /* * ROUTINE: FINISH * * DESCRIPTION: * Cleans up after a conversion sequence. * * PARAMETERS: * ctxptr: address of context, write only, by reference * * RETURNS: VMS condition value */ vms_status_t FINISH (void **ctxptr) { context_t *ctx = *ctxptr; int32_t ctxsize = context_size + CTX_S_BUFFER; if (ctx->ctx_use_iconv) iconv_close(ctx->ctx_cd); LIB$FREE_VM(&ctxsize, &ctx); *ctxptr = 0; return SS$_NORMAL; } /* FINISH */ /* * ROUTINE: get_local_charset * * DESCRIPTION: * Gets the name of the local character set. By default, * this is "EUC-JP". A user may select a different character * set by creating a the file MX_LOCAL_CHARSET.DAT in his/her * login directory. This file should contain exactly one line * with the name of the desired character set. * * PARAMETERS: * csname: char_string, write only, by reference * usrnam: char_string, read only, by descriptor * * RETURNS: void */ static void get_local_charset (char csname[CTX_S_CSNAME], const struct dsc$descriptor *usrnam) { struct item_list_3 { u_int16_t bufsiz; u_int16_t itmcod; void *bufadr; u_int16_t *retlen; } itmlst[3]; struct dsc$descriptor dsc; u_int16_t retlen, unlen; u_int32_t fctx; vms_status_t status; char *unptr, myuname[33], filename[256]; /* * Start with the default */ strcpy(csname, "EUC-JP"); if (0 == usrnam || !OK(LIB$ANALYZE_SDESC((void *) usrnam, &unlen, &unptr))) return; itmlst[0].bufsiz = sizeof(myuname)-1; itmlst[0].itmcod = JPI$_USERNAME; itmlst[0].bufadr = myuname; itmlst[0].retlen = &retlen; itmlst[1].bufsiz = itmlst[1].itmcod = 0; if (!OK(SYS$GETJPIW(0, 0, 0, itmlst, 0, 0, 0))) return; while (retlen > 0 && myuname[retlen-1] == ' ') retlen--; myuname[retlen] = '\0'; if (unlen == retlen && strncasecmp(unptr, myuname, retlen) == 0) strcpy(filename, "SYS$LOGIN:MX_LOCAL_CHARSET.DAT"); else { char devnam[256], dirnam[256]; uint16_t devnamlen, dirnamlen; itmlst[0].bufsiz = sizeof(devnam)-1; itmlst[0].itmcod = UAI$_DEFDEV; itmlst[0].bufadr = devnam; itmlst[0].retlen = &devnamlen; itmlst[1].bufsiz = sizeof(dirnam)-1; itmlst[1].itmcod = UAI$_DEFDIR; itmlst[1].bufadr = dirnam; itmlst[1].retlen = &dirnamlen; itmlst[2].bufsiz = itmlst[2].itmcod = 0; if (!OK(SYS$GETUAI(0, 0, (void *) usrnam, itmlst, 0, 0, 0))) return; strncpy(filename, devnam, devnamlen); strncpy(filename+devnamlen, dirnam, dirnamlen); strcpy(filename+devnamlen+dirnamlen, "MX_LOCAL_CHARSET.DAT"); } dsc.dsc$b_dtype = DSC$K_DTYPE_T; dsc.dsc$b_class = DSC$K_CLASS_S; dsc.dsc$w_length = strlen(filename); dsc.dsc$a_pointer = filename; if (!OK(MX_FILE_OPEN(MX__FILE_READ, &dsc, &fctx))) return; dsc.dsc$w_length = CTX_S_CSNAME-1; dsc.dsc$a_pointer = filename; status = MX_FILE_READ(fctx, &dsc, &retlen); MX_FILE_CLOSE(fctx); if (OK(status)) { strncpy(csname, filename, retlen); csname[retlen] = '\0'; } } /* get_local_charset */