#! /usr/bin/perl
#
# Read a list of Chunk label files and construct a long praat script to split them 
# into sentences
#
# use:
#> SplitChunks.pl pattern
#
###############################################################################
#
# Copyright R.J.J.H. van Son © 2000, 2001
#
# Author Rob van Son
# Institute of Phonetic Sciences & ACLC
# University of Amsterdam
# Herengracht 338
# NL-1016CG Amsterdam, The Netherlands
# Email: Rob.van.Son@hum.uva.nl
#        rob.van.son@workmail.com
# WWW  : http://www.fon.hum.uva.nl/rob/
# mail:  Institute of Phonetic Sciences
#        University of Amsterdam
#        Herengracht 338
#        NL-1016CG Amsterdam
#        The Netherlands
#        tel +31 205252183
#        fax +31 205252197
#
# License for use and disclaimers
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
#
#######################################################
#
# Define and initialize
$HomeDir = '../../..';
require "$HomeDir/Links.pl";
require "$Scripts/SentenceLabel.pl";

$LabelDir = "ChunkLabels";
$LabelDestination = "$HomeDir/home/Shadowing";

my @GlobFileList = @ARGV;
my $FilePattern;
foreach $FilePattern (@GlobFileList)
{
        # Get the real filenames (expand * and ?)
        my @FileList = glob("$FilePattern");
        my $FileName;

        # Handle All files
        foreach $FileName (@FileList)
        {        	
        	# Take apart the file path to get the relevant directories and ID numbers
        	# e.g., "./Labels/chunks/F28G/F28G1FPA1.translit"
        	# -> $`=".", $1="Labels/chunks", $2="F28G", $3="translit", $4="F28G1FPA1" , $5="translit"
print "$FileName\n";
        	$FileName =~ m@/?([^/\.]+)\.([\w]+)$@i;
        	my $Code = $1;
         	
         	# Tear apart the CodeName
         	$Code =~ /^([FM][\d]+[A-Z]+)/;
        	my $SpeakerID = $1;
		
		# Read the required start and end points
		my $LabelFile = new SentenceLabel;
		$LabelFile->ReadLabelFile($FileName);
		$LabelFile->presetItem('TRANSLIT');
		while($LabelFile->nextItem('TRANSLIT') > -1)
		{
			# Skip pauses
			next if $LabelFile->currentValue('TRANSLIT') eq 'ISI*x';
			
			my $SentenceName = $LabelFile->currentIDcode('TRANSLIT');
			# Skip existing files
			next if -e "$LabelDestination/$SpeakerID/sentences/$SentenceName.shadow";
			
			# 2*2 seconds margin
			my $SentenceFileLength = $LabelFile->currentEnd('TRANSLIT') - $LabelFile->currentStart('TRANSLIT') + 4;
			
			# Get the label file that corresponds to the current "sentence"
			$SentenceName =~ /^[FM]\d+[A-Z]+[^\+]+\+/;
			my $OrigLabelFileID = $';
			$OrigLabelFileID =~ /^([FM]\d+[A-Z]+)/;
			my $OrigSpeaker = $1;
			
			# Get the original label file(s)
			my @OrigLabelFiles = glob("$SentenceLabels/$OrigSpeaker/phoneme/$OrigLabelFileID*.phoneme");
			# Skip unless there exists a label file
			next unless @OrigLabelFiles;
			
			# Process these label files
			my $CurrentLabelFileName;
			foreach $CurrentLabelFileName (@OrigLabelFiles)
			{
				my $CurrentLabel = new SentenceLabel;
				$CurrentLabel->ReadLabelFile($CurrentLabelFileName);
print "$CurrentLabelFileName\n";
				$CurrentLabel->firstItem('TRANSLIT');
				# 2 seconds margin
				my $Shift = 2 - $CurrentLabel->currentEnd('TRANSLIT');
				# Shift the whole file
				$CurrentLabel->shiftSentenceLabel($Shift, $SentenceFileLength);
				# Delay
				$CurrentLabel->lastItem('TRANSLIT');
				my $Delay = $CurrentLabel->currentEnd('TRANSLIT') - $CurrentLabel->currentStart('TRANSLIT') - 2;
				
				# Add shadow phoneme tier
				$CurrentLabel->copyTier('SHADOW', 'PHONEMES');
				$CurrentLabel->shiftTier('SHADOW', $Delay, $SentenceFileLength);
				
				# Output this Label to file
				$CurrentLabel->FoldedIDcodes('SENTENCE', 'TRANSLIT', 'WORDS', 'CGN', 'LEXCGN', 
				'SYLLABLES', 'LEXSYLL', 'SYLLPARTS', 'PHONEMES', 'SHADOW');

				$CurrentLabel->print("$LabelDestination/$SpeakerID/sentences/$SentenceName.shadow", 
				'SENTENCE', 'TRANSLIT', 'WORDS', 'CGN', 'LEXCGN', 
				'SYLLABLES', 'LEXSYLL', 'SYLLPARTS', 'PHONEMES', 'SHADOW');
			};
			
			
		};
        	
    };
};
