Python: ターミナル上でヒストグラムを可視化する
何本目の車輪か知らないが、ターミナル上でヒストグラムを可視化するスクリプトを再発明。
標準入力に対して、1行ずつ数字(整数or小数)を放り込むと 10個に分けた区間ごとの発生頻度を
「*」の数で可視化する。
1個または複数のファイルを指定して読み取ることも可能。(複数ファイルの場合は合算)
アクセスログや各種ログに含まれる数値の分布を即座に判断しなければならない場合などに使用している。
継続的に見るデータであれば、GrowthForecast などのツールを使った方がいいと思う。
コード
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
print_histo.py | |
Visualize histogram as text. | |
""" | |
import sys | |
import numpy | |
import exceptions | |
class State(object): | |
def __init__(self, limit=100000, bar_width=48): | |
self.limit = limit | |
self.bar_width = bar_width | |
self.count = 0 | |
self.xs = [] | |
def parse(self, s): | |
if self.limit <= self.count: | |
raise RuntimeError("Number of data is reached to limit: %d" % self.limit) | |
try: | |
self.xs.append(float(s)) | |
self.count += 1 | |
except exceptions.ValueError: | |
# Ignore input. | |
pass | |
def __str__(self): | |
freqs, bins = numpy.histogram(self.xs) | |
# Functions for string | |
justify = lambda ss, func: [func(s)(max(map(len, ss))) for s in ss] | |
rjust = lambda ss: justify(ss, lambda s: s.rjust) | |
ljust = lambda ss: justify(ss, lambda s: s.ljust) | |
# Labels | |
bs = rjust(['%.3f' % b for b in bins]) | |
labels = ['[%s, %s)' % x for x in zip(bs[:-1], bs[1:])] | |
labels[-1] = labels[-1][:-1] + ']' | |
# Frequency | |
f = lambda x: 0 if x == 0 else max(1, x * self.bar_width / max(freqs)) | |
bars = ljust(['*' * f(freq) for freq in freqs]) | |
# Assembles them. | |
buf = ('%s: %s %6d' % x for x in zip(labels, bars, freqs)) | |
return '\n'.join(buf) | |
def process(function): | |
paths = (sys.argv + [None])[1:max(2, len(sys.argv))] | |
for path in paths: | |
try: | |
fp = sys.stdin if path is None else open(path) | |
for line in fp: | |
function(line.rstrip("\n")) | |
except (KeyboardInterrupt, EOFError): | |
pass | |
except Exception: | |
exc_type, exc = sys.exc_info()[:2] | |
print('%s: %s: %s' % (sys.argv[0], exc_type.__name__, exc)) | |
return | |
if __name__ == '__main__': | |
s = State() | |
process(s.parse) | |
print(s) |
実行例
$ (for i in {1..10}; do echo $RANDOM; done) | ./print_histo.py [ 458.000, 3654.300): ************************ 1 [ 3654.300, 6850.600): ************************************************ 2 [ 6850.600, 10046.900): 0 [10046.900, 13243.200): ************************************************ 2 [13243.200, 16439.500): ************************ 1 [16439.500, 19635.800): 0 [19635.800, 22832.100): ************************ 1 [22832.100, 26028.400): 0 [26028.400, 29224.700): ************************************************ 2 [29224.700, 32421.000]: ************************ 1 $ (for i in {1..10000}; do echo $RANDOM; done) | ./print_histo.py [ 1.000, 3277.600): ******************************************** 975 [ 3277.600, 6554.200): ********************************************* 1002 [ 6554.200, 9830.800): ******************************************* 949 [ 9830.800, 13107.400): ************************************************ 1058 [13107.400, 16384.000): ********************************************* 1011 [16384.000, 19660.600): ********************************************** 1022 [19660.600, 22937.200): ******************************************** 988 [22937.200, 26213.800): ********************************************** 1016 [26213.800, 29490.400): ********************************************** 1016 [29490.400, 32767.000]: ******************************************* 963 $ |