#! /usr/bin/perl
#
# Special purpose program for Unix/Linux:
# Determine the order number of word use (Given or New).
#
# Construct a Database table.
#
# Use:
# ./ConstructGivenNew.pl
#
# This will do a select for words and sentence sortnumbers.
# These are used to count word occurrences.
#
#
###############################################################################
#
# Copyright R.J.J.H. van Son © 2000, 2001
#
# Author Rob van Son
# Institute of Phonetic Sciences & ACLC
# University of Amsterdam
# Herengracht 338
# NL-1016CG Amsterdam, The Netherlands
# Email: Rob.van.Son@hum.uva.nl
#        rob.van.son@workmail.com
# WWW  : http://www.fon.hum.uva.nl/rob/
# mail:  Institute of Phonetic Sciences
#        University of Amsterdam
#        Herengracht 338
#        NL-1016CG Amsterdam
#        The Netherlands
#        tel +31 205252183
#        fax +31 205252197
#
# License for use and disclaimers
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
#
#######################################################
#
$HomeDir = '../../..';
require "$HomeDir/Links.pl";
require "$HomeDir/connectDBD.pl";

#
# Where are the files
my $TypeExtension = 'given';
my $TableDirectory = "$DatabaseTables/PhInfo";

# The Column Headers list will add the corresponding column headers.
open(DATABASE, ">$TableDirectory/GivenNewWordsTable.txt") 
    || die "Database >$TableDirectory/GivenNewWordsTable.txt: $!\n";
print DATABASE << "HEADEREND";
\#> ID [TEXT] IDcode of word
\#> Speaker [TEXT] Speaker ID
\#> TextType [CHAR(1)] Type of text (Fixed/Variable)
\#> Style [CHAR(4)] Speaking style
\#> word [TEXT] Word text
\#> RecNo [INT2] Number of recording session
\#> OrderNo [INT4] Unique Order number for sorting sentences
\#> Sentencepos [TEXT] Position in the sentence
\#> Value [INT4] Occurrence number in this style&recording
\#
\# $TableDirectory/GivenNewWordsTable.txt
HEADEREND

# Print the copyright license
if(open(LICENSE, "<./licenseterms.txt"))
{
	my $Date = localtime();
	$Date =~ /\s([\d]+)\s*$/;
	my $Year = $1;
	while(<LICENSE>)
	{
		if(/\©/ && ! /$Year/)
		{
			s/(\d)\s{6}(\s+\#)/\1, $Year\2/ig;
		};
		print DATABASE $_;
	};
	close(LICENSE)
};

#
# Construct the SELECT statement.
my $SELECT =<< "SELECTENDSTATEMENT";
SELECT
	wordsphoneme.id as id,
	wordsphoneme.speaker as speaker,
	wordsphoneme.texttype as texttype,
	wordsphoneme.style as style,
	wordsphoneme.value as word,
	wordsphoneme.recnr as recnr,
	translitphoneme.ordernbr as ordernbr,
	wordsphoneme.sentencepos as sentencepos
FROM
	wordsphoneme, translitphoneme
WHERE
	wordsphoneme.sentenceid = translitphoneme.id
	AND translitphoneme.id = wordsphoneme.sentenceid
ORDER BY
	wordsphoneme.speaker, wordsphoneme.texttype, 
	wordsphoneme.style, translitphoneme.ordernbr, 
	wordsphoneme.sentencepos
;	
SELECTENDSTATEMENT

my @AttributeNames = ('id', 'speaker', 'texttype', 'style', 'word', 'recnr', 'ordernbr', 'sentencepos');
# Execute the query
$sth = $conn->prepare("$SELECT;");
$result = $conn->selectall_arrayref($sth);
$ntuples = scalar(@$result);

if($conn->err)
{
    print "\# $conn->errstr\nexecuting: $conn->{Statement}\n"
}
elsif($DBI::err)
{
    print "\# ", $DBI::errstr, "\nexecuting: $SELECT\n"
};
		
# Read out the result
my %WordOccurenceCount = ();
foreach $rowreference (@$result)
{
	my ($id, $speaker, $texttype, $style, $word, $recnr, $ordernbr, $sentencepos)
	    = map {$rowreference->[$sth->{NAME_lc_hash}{$_}];} @AttributeNames;
	
	# Add one the word-count
	my $CurrentEntry = "$speaker;;$texttype;;$style;;$word";
	++$WordOccurenceCount{$CurrentEntry};
	print DATABASE
	"$id\t$speaker\t$texttype\t$style\t$word\t$recnr\t$ordernbr\t$sentencepos\t$WordOccurenceCount{$CurrentEntry}\n";
	
};

close(DATABASE);
