#!/bin/bash # # Copyright (C) 2002 Graham Williams # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # or email the author of this code for a copy. shortmsg () { printf "Try \`$0 --help' for more information.\n" >/dev/stderr } usage () { printf "Usage: $0 [options] datafile > plot.pdf -h, --help Usage plus extra help -b, --bins=n Number of bins [10] -t, --title=title Top title -T, --type=[pdf,eps,fig,png] Output type to generate " >/dev/stderr } help () { printf " Generates a binned bar chart of the supplied data. Input data is assumed numeric single column. The graphic output is written to standard out. The bar chart plots the frequency distribution of the evenyl binned data. The binning divides the numeric range (min value to max value) into bin size (default 10) intervals of the same numeric width. If labels or any other entity is poorly placed then generate FIG using \`-T fig' and modify with xfig then export to PDF. " >/dev/stderr } # # Default options: There's lots more we could parameterise! # bin_count=10 typeout="pdf" given_title="" use_title=1 # # Handle command line: getopt reads the command line, checks for errors # then generates a reformatted, reordered and well formed command line. # Thus the user can make the command line as ugly as they like :-) # command_line=$(getopt --options b:ht:T: \ --longoptions bins:,help,title:type: \ --alternative --name "$0" -- "$@") if [ $? -gt 0 ] ; then exit 1 fi # # Replace old command line with newly parsed command line # eval set -- "$command_line" # # Now process all arguments # while true ; do case "$1" in -h|--help) usage ; help ; exit 0 ;; -b|--bins) bin_count=$2 ; shift 2;; -t|--title) use_title=0; given_title="$2" ; shift 2;; -T|--type) typeout="$2" ; shift 2 ;; --) shift ; break ;; *) echo "$0: Internal error 1!" ; exit 1 ;; esac done # # Check supplied arguments # if [ "${typeout}" != "pdf" \ -a "${typeout}" != "eps" \ -a "${typeout}" != "fig" \ -a "${typeout}" != "png" ] ; then printf "$0: allowed types are [pdf,eps,fig,png], not: ${typeout}\n" \ >/dev/stderr shortmsg exit 1 fi if [ $# -ne 1 ] ; then printf "$0: expecting one data file but got $#: $*\n" \ >/dev/stderr shortmsg exit 1 fi # # Record the data file names and check it exists # datafile=$1 if [ ! -f ${datafile} ] ; then printf "$0: data file '%s' not found\n" ${datafile} >/dev/stderr exit 1 fi min_value=$(cat ${datafile} | sort -n | head -1) max_value=$(cat ${datafile} | sort -n | tail -1) data_size=$(wc -l ${datafile} | awk '{print $1}') # If the column contains integers 1 - 7. If I select 7 bins the # bins do not align with the integers! This is correct because # there are only 6 bins between 1 and 7. If I want the bins # aligned with the integers I would choose 6 bins! But then # again????? So I added 1.0 in the equation below. I have not # thought through the consequences on continuous data!!!!!! Looks # okay though. # # Note that dc requires -3 to be input as _3 # bin_size=$(echo "2 k 1 ${max_value} + ${min_value} - ${bin_count} / p" \ | perl -pi -e 's|-(\d)|_$1|' \ | dc) # # Plot title # plot_title="Frequency distribution of " if [ "${use_title}" -eq 0 ]; then plot_title=${given_title} else plot_title="${plot_title} ${datafile} into ${bin_count} bins (n=${data_size})" fi ( # # Get ready to chart # printf " set title \"%s\" set terminal fig color set xlabel '%s' set ylabel '%s' set boxwidth %0.2f plot [%s:%s] [0:] '-' notitle with boxes 2 \n" \ "${plot_title}" \ "Bins" "Frequency" \ $(dc -e "2 k 0.4 ${bin_size} * p") \ $(echo "${min_value} ${bin_size} - p" | perl -pi -e 's|-(\d)|_$1|' | dc) \ $(echo "${max_value} ${bin_size} + p" | perl -pi -e 's|-(\d)|_$1|' | dc) # # Generate the data to be bar charted # cat ${datafile} \ | awk -v BINSIZE=${bin_size} -v MINVALUE=${min_value} ' { bin_index = int(($1 - MINVALUE)/BINSIZE); bin_count[bin_index]++ } END \ { for (i in bin_count) printf("%.2f %d\n", MINVALUE+(i*BINSIZE), bin_count[i]) }' # # End of data # printf "e\n" ) \ | gnuplot \ | awk ' BEGIN {convert=0} # # Convert rectangles with line stlye 1 to filled rectangles. # /2 1 0 1 1 1 10 0 -1 0.000 0 0 0 0 0 5/ \ { print "2 1 0 1 -1 -1 10 0 5 0.000 0 0 0 0 0 5"; next; } {print} ' \ | (case "${typeout}" in "fig") cat ;; "eps") fig2dev -L eps -n "${bartitle}" ;; "pdf") fig2dev -L pdf -n "${bartitle}" ;; "png") fig2dev -L png -m 2 \ | convert -transparent white - - ;; *) echo "$0: Internal error 2!" ; exit 1 ;; esac)