2.03.2011

mogmail – JIStoSJIS converter

mogmail - JIStoSJIS変換

・character_converter.h

   1: // Copyright (c) 2011 Mog Project. All rights reserved.
   2:  
   3: #ifndef _MOG_UTIL_CHARACTER_CONVERTER_H_
   4: #define _MOG_UTIL_CHARACTER_CONVERTER_H_
   5: #pragma once
   6:  
   7: #include <string>
   8:  
   9: namespace mog {
  10: namespace util {
  11:  
  12: // A class manages character-code converting.
  13: class CharacterConverter {
  14:  public:
  15:   // convert JIS(ISO-2022-JP) std::string to Shift-JIS
  16:   static void JIStoSJIS(std::string const& jis, std::string * sjis);
  17:   static std::string JIStoSJIS(std::string const& jis);
  18:  
  19:  private:
  20:   enum JISFlags { FLAG_SINGLE_BYTE, FLAG_DOUBLE_BYTE, };
  21:   struct JISEscapeSequences {
  22:     char const* sequence;
  23:     JISFlags    flag;
  24:   };
  25:   static JISEscapeSequences const kJISEscapeSequences[];
  26: };
  27:  
  28: }  // namespace util
  29: }  // namespace mog
  30: #endif  // _MOG_UTIL_CHARACTER_CONVERTER_H_

・character_converter.cc

   1: // Copyright (c) 2011 Mog Project. All rights reserved.
   2:  
   3: #include "character_converter.h"
   4: #include <mbstring.h>
   5: #include <boost/foreach.hpp>
   6:  
   7: namespace mog {
   8: namespace util {
   9:  
  10: ////////////////////////////////////////////////////////////////////////////////
  11: // CharacterConverter
  12: CharacterConverter::JISEscapeSequences const CharacterConverter::kJISEscapeSequences[] = {
  13:   { "\x1b\x28\x42", CharacterConverter::FLAG_SINGLE_BYTE },  // reg#06 ASCII
  14:   { "\x1b\x28\x4a", CharacterConverter::FLAG_SINGLE_BYTE },  // reg#14 JIS X 0201-Roman
  15:   { "\x1b\x24\x40", CharacterConverter::FLAG_DOUBLE_BYTE },  // reg#42 old JIS kanji(JIS C 6226-1978)
  16:   { "\x1b\x24\x42", CharacterConverter::FLAG_DOUBLE_BYTE },  // reg#87 new JIS kanji(JIS X 0208-1983)
  17: };
  18:  
  19: void CharacterConverter::JIStoSJIS(std::string const& jis, std::string * sjis) {
  20:   sjis->clear();
  21:   JISFlags current_flag = FLAG_SINGLE_BYTE;
  22:  
  23:   for (std::string::const_iterator it = jis.begin(); it != jis.end(); ++it) {
  24:     if ('\x1b' == *it) {
  25:       BOOST_FOREACH(JISEscapeSequences esc, kJISEscapeSequences ) {
  26:         std::string::const_iterator it_end = it + std::string(esc.sequence).size();
  27:         if (esc.sequence == std::string(it, it_end)) {
  28:           current_flag = esc.flag;
  29:           it = it_end - 1;
  30:           break;
  31:         }
  32:       }
  33:       continue;
  34:     }
  35:     if (FLAG_DOUBLE_BYTE == current_flag) {
  36:       union {
  37:         struct {
  38:           char high;
  39:           char low;
  40:         } byte;
  41:         unsigned int word;
  42:       } double_byte;
  43:  
  44:       double_byte.byte.low = *it;
  45:       double_byte.byte.high = *(++it);
  46:       double_byte.word = _mbcjistojms(double_byte.word);
  47:       sjis->push_back(double_byte.byte.low);
  48:       sjis->push_back(double_byte.byte.high);
  49:     } else {
  50:       sjis->push_back(*it);
  51:     }
  52:   }
  53: }
  54:  
  55: std::string CharacterConverter::JIStoSJIS(std::string const& jis) {
  56:   std::string out_value;
  57:   JIStoSJIS(jis, &out_value);
  58:   return out_value;
  59: }
  60:  
  61: }  // namespace util
  62: }  // namespace mog

0 件のコメント:

コメントを投稿