#!/bin/bash
# Michal Trs
#
# 36NAN - strom telecom data
# prevod vystupu z preprocess.sh do arff (Weka format)
# Vstup: 5 sloupcu na stdin nebo soubor
#  1 - den v tydnu (E):       0 - 6
#  2 - hodina ve dni (E):     0 - 23
#  3 - prichozi ustredna (K): cislovano od 0
#  4 - odchozi ustredna (L):  cislovano od 0
#  5 - zpusob ukonceni (M):   3 (uspech) -> 1, ostatni (neuspech) -> 0
#
#  Vystup: do souboru telecom.arff
#   

function p1 {
  echo -n "@attribute '$1' {"
}

function p2 {
  p=0; 
  for i in $@; do
    ((p++));
    if [ $p -eq 1 ]; then 
      echo -n "${i}";
    else  
      echo -n ",$i";
    fi
  done;
  echo "}";
}

function header {
  c_day=`cut -d' ' -f1 $1 | sort -n -u -k1`;
  c_hr=`cut -d' ' -f2 $1 | sort -n -u -k1`;
  c_d1=`cut -d' ' -f3 $1 | sort -n -u -k1`;
  c_d2=`cut -d' ' -f4 $1 | sort -n -u -k1`;
  c_st=`cut -d' ' -f5 $1 | sort -n -u -k1`;

  echo "@relation 'strom telecom data'";
  p1 day ;   p2 $c_day;
  p1 hour;   p2 $c_hr;
  p1 dir1;   p2 $c_d1;
  p1 dir2;   p2 $c_d2;
  p1 status; p2 $c_st;
  echo "@data";
}

outfile="telecom.arff"

while getopts o: volba
  do
    case $volba in
	    o)   outfile=$OPTARG;;
	    \?)  exit 2;;
    esac
  done

shift `expr $OPTIND - 1` 



if [[ $# == 0 ]]; then
 # zpracovani v pipe
   cat > tmp.$$; 
   h=`header tmp.$$`;
   awk -v head="$h" 'BEGIN {OFS=","; print head} 
         {print $0}' tmp.$$ > $outfile;   
   rm tmp.$$;
else
   h=`header $1`;
   awk -v head="$h" 'BEGIN {OFS=","; print head} 
         {print $0}' $1 > $outfile;   
fi;


  

