16進バイト列を10進数文字列に変換する

8 バイトの数値を 10 進文字列に変換するなら glibc の sprintf に書式文字列 "%llu" を与えて得られることはわかったんだけれど、任意長のバイト列を 10 進数文字列に変換するって、できるかな？というチャレンジが心をよぎったのでやってみた。

#include <vector>
#include <iostream>
#include <iterator>

typedef std::vector<bool> bits;
typedef std::vector<int> nums;

// バイト列をビット列に変換する
// バイト列はリトルエンディアン、つまり大きい桁から順に格納されているものとみなす。
// 後にでてくる convert 関数の条件分岐を減らすために、冒頭に 0 を意味する false を入れている。
static bits from(const unsigned char *const begin, const unsigned char *const end) {
  bits bs;
  bs.push_back(false);
  for (const unsigned char *p = begin; p != end; ++p) {
    bs.push_back(*p & 0x80);
    bs.push_back(*p & 0x40);
    bs.push_back(*p & 0x20);
    bs.push_back(*p & 0x10);
    bs.push_back(*p & 0x08);
    bs.push_back(*p & 0x04);
    bs.push_back(*p & 0x02);
    bs.push_back(*p & 0x01);
  }
  return bs;
}

// 10 進数字配列であらわされる値を二倍した数字配列を返す
// 配列の添字 0 が一桁目、添字 1 が二桁目、…というように 10 進数をあらわす。
static nums dbl(nums ns) {
  for(size_t i = 0; i < ns.size(); ++i)
    ns[i] *= 2;
  for(size_t i = 0; i < ns.size(); ++i)
    if (ns[i] >= 10) { ++ns[i + 1]; ns[i] -= 10; }
  return ns;
}

// 10 進数字配列であらわされる値を 1 増やした数字配列を返す
static nums inc(nums ns) {
  ++ns[0];
  for(size_t i = 0; i < ns.size(); ++i)
    if (ns[i] >= 10) { ++ns[i + 1]; ns[i] -= 10; }
  return ns;
}

// ビット列から数字配列に変換する
static nums convert(bits bs) {
  nums ns((bs.size() - 1) / 3); // 多めに桁を確保する
  for(bits::iterator it = bs.begin(); it != bs.end(); ++it) {
    ns = dbl(ns);
    if (*it) ns = inc(ns);
  }

template<typename T, size_t N> inline T* end_(T (&a)[N]) { return a + N; }

int main() {
  // ULONG_MAX ＋ 1
  unsigned char hex[] = {0x01,0x0,0x0,0x0,0x0};

  // 変換する
  nums ns = convert(from(hex, end_(hex)));

  // 数字配列を、添字逆順に標準出力にコピーする
  std::copy(ns.rbegin(), ns.rend(), std::ostream_iterator<int>(std::cout));
  std::cout << std::endl;

  return 0;
}

着想は単純で、二進数を数値に変換する処理をなぞって、二倍する処理と、一増やす処理を10進数値配列に対して適用できるようにしたもの。

書き直すとよい、あるいは改善できる点。:

main 関数の引数で 16 進文字列を受け取って、それぞれの 10 進数値を出力できるようにする
convert 関数で、上位桁で連続する 0 を消去する
convert 関数の、ビット列を表現するために必要な桁数を見積もりかた
あるいは dbl と inc の両関数で、桁が必要なだけ増やすようにする

コンパイル・リンク、実行結果はこんな感じ。:

$ make hex2dec CXXFLAGS="-O2 -Wall" && ./hex2dec 
g++ -O2 -Wall    hex2dec.cc   -o hex2dec
0004294967296

書き直すと良い点を反映してみた

自分でつくったお題ながら楽しくてかないません。アホだなあ…

#include <vector>
#include <iostream>
#include <iterator>
#include <cstring>

typedef std::vector<bool> bits;
typedef std::vector<int> nums;

template<typename T, size_t N> inline T* end_(T (&a)[N]) { return a + N; }

static bits from(const char *const begin, const char *const end) {
  bits bs;
  bs.push_back(false);
  for (const char *p = begin; p != end; ++p) {
    static const char hex[] = "0123456789ABCDEFabcdef";
    size_t d = std::distance(hex, std::find(hex, end_(hex), *p));
    if (22 < d) continue;
    if (16 <= d && d < 22) d -= 6;
    bs.push_back(d & 0x08);
    bs.push_back(d & 0x04);
    bs.push_back(d & 0x02);
    bs.push_back(d & 0x01);
  }
  return bs;
}

static nums carry(nums ns) {
  for(nums::iterator it = ns.begin(); it != ns.end(); ++it) {
    if (10 <= *it) {
      *it -= 10;
      nums::iterator n = it;
      if (++n != ns.end()) *n += 1; else { ns.push_back(1); break; }
    }
  }
  return ns;
}

static nums dbl(nums ns) {
  for(nums::iterator it = ns.begin(); it != ns.end(); ++it)
    *it *= 2;
  return carry(ns);
}

static nums inc(nums ns) {
  return ns.empty() ? nums(1,1) : (++ns.front(), carry(ns));
}

static nums convert(bits bs) {
  nums ns;
  for(bits::iterator it = bs.begin(); it != bs.end(); ++it)
    ns = *it ? inc(dbl(ns)) : dbl(ns);
  return ns;
}

int main(int argn, char **args) {
  while((++args, --argn)) {
    nums ns = convert(from(*args, *args + strlen(*args)));
    std::copy(ns.rbegin(), ns.rend(), std::ostream_iterator<int>(std::cout));
    std::cout << std::endl;
  }
  return 0;
}

コマンドラインの文字列から値を読んで出力します。:

$ make hex2dec CXXFLAGS="-O2 -Wall" &&\
 ./hex2dec 1 10 100 1000 10000 100000 1000000\
 10000000 100000000 1000000000 10000000000
g++ -O2 -Wall    hex2dec.cc   -o hex2dec
1
16
256
4096
65536
1048576
16777216
268435456
4294967296
68719476736
1099511627776

カッとなって STL アルゴリズムを導入した

ただもうひたすら趣味の世界。

#include <vector>
#include <iostream>
#include <iterator>
#include <cstring>
#include <algorithm>
#include <numeric>

typedef std::vector<bool> bits;
typedef std::vector<int> nums;

template<typename T, size_t N> inline T* end_(T (&a)[N]) { return a + N; }

static bits from(const char *const begin, const char *const end) {
  bits bs;
  bs.push_back(false);
  for (const char *p = begin; p != end; ++p) {
    static const char hex[] = "0123456789ABCDEFabcdef";
    size_t d = std::distance(hex, std::find(hex, end_(hex), *p));
    if (22 < d) continue;
    if (16 <= d && d < 22) d -= 6;
    bs.push_back(d & 0x08);
    bs.push_back(d & 0x04);
    bs.push_back(d & 0x02);
    bs.push_back(d & 0x01);
  }
  return bs;
}

static nums carry(nums ns) {
  for(nums::iterator it = ns.begin(); it != ns.end(); ++it) {
    if (10 <= *it) {
      *it -= 10;
      nums::iterator n = it;
      if (++n != ns.end()) *n += 1; else { ns.push_back(1); break; }
    }
  }
  return ns;
}

static void dbl_aux(int &v) { v *= 2; }
static nums dbl(nums ns)
{ return std::for_each(ns.begin(), ns.end(), dbl_aux), carry(ns); }

static nums inc(nums ns)
{ return ns.empty() ? nums(1,1) : (++ns.front(), carry(ns)); }

static nums cnv_aux(nums ns, bool v) { return v ? inc(dbl(ns)) : dbl(ns); }
static nums convert(bits bs)
{ return std::accumulate(bs.begin(), bs.end(), nums(), cnv_aux); }

static inline void print(const nums &ns) {
  std::copy(ns.rbegin(), ns.rend(), std::ostream_iterator<int>(std::cout));
  std::cout << std::endl;
}

int main(int argn, char **args) {
  while((++args, --argn)) print( convert(from(*args, *args + strlen(*args))) );
  return 0;
}

dbl_aux や cnv_aux のような小さな関数を、アルゴリズムに渡すために名前をつけてファイルスコープで定義しないとならないのが不便。このあたり、 C++0x の lambda に過剰な期待を抱いてしまうんだけど…使いにくかろうなあやっぱり。

carry 関数を書き直してみた

ループの中で常に次の要素の有無を調べて、なければ（最後の桁上がりを追加するために）ループをまわすイテレーターを破壊する ns.push_back を呼び出して break するという処理が醜悪におもえてしかたなかったからである。
また、加算での桁上がりは 1 ずつに過ぎないからとさぼっていた桁上がりの処理を、あるべきように書き換えた。

typedef std::pair<nums,int> carry_t;
static inline nums cons(nums ns, int value) { ns.push_back(value); return ns; }
static inline carry_t carry_aux(carry_t pair, int v) {
  // v には次の桁の値が、ペアの二番目の要素には桁上がりの値が入っている
  const int t = pair.second + v;
  return std::make_pair(cons(pair.first, t % 10), t / 10);
}
static nums carry(nums ns) {
  carry_t pair = std::accumulate(ns.begin(), ns.end(), carry_t(), carry_aux);
  return pair.second ? cons(pair.first, pair.second) : pair.first;
}

ループを accumulate で置き換えて、ループを抜けてから carry の有無を調べて必要に応じて最上位桁を追加するようにした。
このように整理した後でも、 carry_aux 関数と carry 関数とに、構造が非常ににかよった cons があるところが気に入らない。