#! /usr/bin/perl
#
# Read in Raw Russian data and convert them to the format used
#
# Read STDIN or argument list

# First line contains column names
print << 'ENDOFATTRIBUTELIST';
#> id [TEXT] Id
#> Speaker [TEXT] Speaker
#> Sex [TEXT] Sex
#> SpeechType [TEXT] SpeechType
#> Index [INT] Index
#> File [TEXT] File
#> Acoustic [TEXT] Acoustic
#> VowelSimple [TEXT] VowelSimple
#> Perceptual [TEXT] Perceptual
#> Ideal [TEXT] Ideal
#> Syllable [TEXT] Syllable
#> SylDuration [FLOAT] SylDuration
#> Prev_Context [TEXT] Prev_Context
#> Following_Context [TEXT] Following_Context
#> Stress [TEXT] Stress
#> Duration [FLOAT] Duration
#> FI [FLOAT] FI
#> FI_st [FLOAT] FI_st
#> FII [FLOAT] FII
#> FII_st [FLOAT] FII_st
#> CoG [FLOAT] CoG
#> CoG_st [FLOAT] CoG_st
#> Int [FLOAT] Int
#> Word [TEXT] Word
#> WordFreq [FLOAT] WordFreq
#> Onset [TEXT] Onset
#> OnsetFreq  [FLOAT] OnsetFreq
#
# Intas915RussianVowels
ENDOFATTRIBUTELIST


while(<>)
{
    chomp;
    next unless /\S/;
    next unless /\d/;
    
    # Get rid of , io .
    s/(\d)\,(\d)/\1\.\2/g;
    
    # Split into attribute values
    my @attributelist = split(/\t/, $_);
    
    # Sex and Speechtype are Capitals
    $attributelist[0] = uc($attributelist[0]);
    $attributelist[1] = uc($attributelist[1]);
    
    my $ID = $attributelist[3].$attributelist[2];
    # Shift Speaker to the second position
    unshift(@attributelist, $attributelist[4]);
    # Remove second speaker
    splice(@attributelist, 5, 1);
    # Add ID in front
    unshift(@attributelist, $ID);
    
    # Ready, output
    print join("\t", @attributelist), "\n";
};
