#!/usr/bin/env python3
import sys
import csv
import getopt
import random
import re

class Sentence:
    pass

class Word:
    """Object defining the contents and properties of a word"""
    def __init__(self, word, syllables, stress):
        # Actual word or None for a placeholder
        self.word = word
        # Syllables count
        self.syllables = syllables
        # Stress position
        self.stress = stress
   
class WordPicker:
    def __init__(self, words_by_feature):
        self.words_by_feature = words_by_feature

    def get(self, position_features, word_features, exclude = [], 
            enumeration_length = 1):
        """Get one, or a list of words. They are randomly picked from the
        dictionary, matched on their word form."""
        random.shuffle(self.words_by_feature[word_features])
        
        if len(exclude) >= 1 and type(exclude[0]) is not str:
            exclude = unwrap_words(exclude)
        
        # Gets a (copied) slice of the desired length.
        enumeration = [x for x in self.words_by_feature[word_features]
                       if (x.word not in exclude)][:enumeration_length]
                    
        return enumeration[0] if enumeration_length == 1 else enumeration

class DummyWordPicker:
    """This picker returns empty words, marked with the passed features.
    These dummy words are used as place holders.
    
    """
    def get(self, position_features, word_features, exclude = [], 
            enumeration_length = 1):
        syllables, stress = word_features
        return Word(None, syllables, stress)
    
class ControlWordPicker(WordPicker):
    # This states how often an (enumeration_length, position) occurred.
    matcher = {}
    
    def __init__(self, parent_picker, control_word, max_occurrence = 1):
        """Intializes the control word picker. This allows 
        
        Keyword arguments:
        
        parent_picker -- parent WordPicker object
        control_word -- must be a Word containing the features
        max_occurrence -- specifies how often the control word can occur
                          on a specific (enumeration_length, position).
        
        """
        self.parent_picker = parent_picker
        self.max_occurrence = max_occurrence
        self.control_word = control_word
       
    def reset(self):
        self.matcher = {} 
        
    def get(self, position_features, word_features, exclude = [], 
            enumeration_length = 1):
        if len(exclude) >= 1 and type(exclude[0]) is not str:
            exclude = unwrap_words(exclude)
            
        # A control will only be given, if it has not already been given
        # too often in this position X enumeration length and the placeholder
        # has the same features as the control word.
        if (word_features != (self.control_word.syllables, 
                              self.control_word.stress) or
                (self.control_word.word in exclude) or
                (position_features in self.matcher and
                 self.matcher[position_features] == self.max_occurrence)):
            return self.parent_picker.get(self, word_features, exclude,
                                          enumeration_length)
        else:
            # new sentence position! Allowed to insert the control word here
            if position_features in self.matcher:
                self.matcher[position_features] += 1
            else:
                self.matcher[position_features] = 1
                
            return self.control_word
 
def unwrap_words(lst):
    """Converts a list of Word instances to a list of string instances as set
    in the .word attribute of each instance.""" 
    return [x.word for x in lst if x != None]

def generate_sentences(words, carriers, control_word, stimuli_groups, 
                       min_enumeration_length, max_enumeration_length, 
                       conjunction, verbose):
    """Generate the randomly ordered sentences which are to be shown to the
    respondent."""

    control = Word(control_word, None, None)

    words = parse_words_csv(words) 
    carriers = [""] if carriers == None else parse_carriers(carriers)
    
    control.syllables, control.stress = [(x.syllables, x.stress)
                                         for x in words if
                                         x.word == control.word][0]
    
    syllable_lengths, stress_positions, words_by_feature = get_word_properties(
        words)
            
    placeholder_creator = DummyWordPicker()
    control_word_picker = ControlWordPicker(placeholder_creator, control, 2)
    
    # generate the first stimuli group
    enumerations_by_length = generate_enumerations_by_length(
        min_enumeration_length,
        max_enumeration_length,
        syllable_lengths,
        stress_positions,
        placeholder_creator)
    
    shuffle_columns(min_enumeration_length, max_enumeration_length, 
                    enumerations_by_length, control_word_picker)
    
    for i in range(1, stimuli_groups):            
        # append some more enumerations
        additional_enumerations = generate_enumerations_by_length(
                min_enumeration_length,
                max_enumeration_length,
                syllable_lengths,
                stress_positions,
                placeholder_creator)
        
        shuffle_columns(min_enumeration_length, max_enumeration_length,
                        additional_enumerations, control_word_picker)
        
        for length, enumerations in additional_enumerations.items():
            enumerations_by_length[length] += enumerations
    
    word_picker = WordPicker(words_by_feature)
    
    # generated stimuli enumerations
    stimuli = []
    
    fill_placeholders(enumerations_by_length, control_word_picker)
    
    for i in enumerations_by_length:
        for enumeration in enumerations_by_length[i]:
            generated_sentence = '' #
            
            # Fill in the placeholder words
            for j in range(0, len(enumeration)):
                if enumeration[j].word == None:
                    enumeration[j] = word_picker.get(
                        position_features=(i, j+1),
                        word_features=(enumeration[j].syllables, 
                                       enumeration[j].stress), 
                        exclude=[x.word for x in enumeration[0:j] 
                                 if x.word != None])
            
            generated_sentence += ', '.join([x.word for x in enumeration[:-1]]) 
            generated_sentence += ' ' + conjunction + ' ' + enumeration[-1].word
                
            word_features = [(x.syllables, x.stress) for x in enumeration]
            
            stimuli.append((generated_sentence, word_features))
            
    random.shuffle(stimuli)
    
    carrier_count = 0
    previous_carrier = ''
    
    # Add the carriers to the stimuli
    for i in range(0, len(stimuli)):
        generated_sentence = stimuli[i][0]
        
        while True:
            # Shuffle the carriers?
            if carrier_count % len(carriers) == 0:
                random.shuffle(carriers)
                
            carrier = carriers[carrier_count % len(carriers)]

            if carrier != previous_carrier:
                break
        
        stimuli[i] = (carrier + ' ' + generated_sentence, stimuli[i][1])
        previous_carrier = carrier
        carrier_count += 1
        
    return stimuli
   
def fill_placeholders(enumerations_by_length, word_picker):
    for i in enumerations_by_length:
        for enumeration in enumerations_by_length[i]:
            # Fill in the placeholder words
            for j in range(0, len(enumeration)):
                if enumeration[j].word == None:
                    enumeration[j] = word_picker.get(
                        position_features = (i,
                                             j + 1), 
                        word_features = (enumeration[j].syllables,
                                         enumeration[j].stress), 
                        exclude = [x.word for x in enumeration[0:j] if
                                   x.word != None])

def shuffle_columns(min_enumeration_length, max_enumeration_length,
                    enumerations_by_length, control_word_picker):
    """Group and randomize by length"""
    def check_control(placeholders, enumerations, position):
        # Check whether the control word is not used twice.
        if position == 0:
            # No need to check the first one: it has no predecessors.
            return True
        
        for i in range(0, len(enumerations)):
            enumeration = enumerations[i]
            
            if placeholders[i].word != None:                
                for j in range(0, position):
                    if placeholders[i].word == enumeration[j].word:
                        return False
                    
        return True
                
    for length in range(min_enumeration_length, max_enumeration_length + 1):
        for position in range(0, length): 
            # randomize place holders over the enumerations
            placeholders = []
            for enumeration in enumerations_by_length[length]:
                # fill in the control word?
                position_features = (length, position + 1)
                word_features = (enumeration[position].syllables,
                                 enumeration[position].stress)
                control_word = control_word_picker.get(position_features, 
                                                       word_features)
                
                placeholders.append(control_word 
                                    if control_word.word != None 
                                    else enumeration[position])
            
            checked = False
            
            while not checked:
                random.shuffle(placeholders)
                checked = check_control(placeholders, 
                                        enumerations_by_length[length],
                                        position)
            
            # store the shuffled place holders in the enumerations
            for i in range(0, len(enumerations_by_length[length])):
                enumeration = enumerations_by_length[length][i]
                enumeration[position] = placeholders[i]
 
def generate_enumerations_by_length(min_enumeration_length, 
                                    max_enumeration_length, 
                                    syllable_lengths,
                                    stress_positions,
                                    word_picker):
    """Gets the enumeration by enumeration length (i.e. the number of words)"""
    # the enumerations by number of words
    enumerations_by_length = {}
          
    # number of words in the stimulus enumeration
    for enumeration_length in range(min_enumeration_length, 
                                    max_enumeration_length + 1):
        enumerations_by_length[enumeration_length] = []
        
        # generate word enumeration for each possible syllable length (in the 
        # corpus file) and stress position
        for syllables in syllable_lengths:
            for stress in stress_positions[syllables]:
                # get a list of random words, containing n number of words,
                # with n being the desired enumeration length
                word_features = (syllables, stress)
                enumeration = []
                
                for i in range(1, enumeration_length + 1):
                    position_features = (enumeration_length, i)
                    
                    # get the words, using the passed word picker, exclude
                    # the previously set words
                    enumeration.append(word_picker.get(position_features, 
                                                       word_features, 
                                                       enumeration))
                    
                enumerations_by_length[enumeration_length].append(enumeration)                

    return enumerations_by_length

def check_stimuli_file(filename, conjunction, carriers, words):
    """Check the consistency of a stimuli file"""
    parsed = []
    line_number = 0
    
    with open(filename, 'rt') as file:
        for line in map(lambda s: s.strip(), file.readlines()):
            line_number += 1
            
            parsed.append(line)
            
            parsed_words = []
            check_carrier, check_words = line.split(': ')
            if check_carrier + ':' not in carriers:
                raise Exception(str(line_number) + ": Carrier invalid: " + check_carrier)
            
            for word in re.split(' ' + conjunction + ' |, ', check_words):
                for x in words:
                    if x.word == word:
                        parsed_words.append((x.syllables, x.stress))
                        break
                else:
                    raise Exception(str(line_number) + ": Word not found: " + word)
            
            parsed.append(str(parsed_words))
            
    # This should match the input file perfectly
    return parsed
    
def parse_carriers(filename):
    with open(filename, 'rt') as file:
        return [line.replace('\n', '') for line in file.readlines()] 
    
def parse_words_csv(filename, delimiter=';', quotechar='"'):
    words = []
    
    # open CSV
    with open(filename, 'rt') as csvfile:
        wordsreader = csv.reader(csvfile, 
                                 delimiter=delimiter, 
                                 quotechar=quotechar)
        
        # loop through all rows, and convert them to data objects
        firstRow = True
        for row in wordsreader:
            if firstRow:
                # check headers
                if (len(row) != 3 or
                    row[0] != 'word' or
                    row[1] != 'syllables' or 
                    row[2] != 'stress'):
                    raise Exception('Invalid header, must be: word;syllables;stress. Use -h for help')
                firstRow = False 
            else:
                words.append(Word(row[0], int(row[1]), int(row[2])))
                
    # return the data object with all the words
    return words

def get_word_properties(words):
    syllable_lengths = []
    stress_positions = {}
    # words for each syllable length X stress position
    words_by_feature = {}
    
    # TODO: check if each combination is large enough!
    for word in words:
        if word.syllables not in syllable_lengths:
            syllable_lengths.append(word.syllables)
        if word.syllables not in stress_positions:
            stress_positions[word.syllables] = []
        if word.stress not in stress_positions[word.syllables]:
            stress_positions[word.syllables].append(word.stress)
        word_features = word.syllables, word.stress
        if word_features not in words_by_feature:
            words_by_feature[word_features] = [word]
        else:
            words_by_feature[word_features].append(word)
    
    return syllable_lengths, stress_positions, words_by_feature

def readfile(stimuli_filename, stimuli_count):
    sentences = []
    
    with open(stimuli_filename, 'rt') as stimuli_file:
        for i in range(0, stimuli_count):
            line = stimuli_file.readline()[:-1]
            
            carrier, words = line.split(':')
            words = words[1:].split(', ')
            
            # the last is split by a conjunction
            last_word = words[-1]
            words[-1] = last_word.split(' ')[0]
            words.append(last_word.split(' ')[-1])
            
            sentence = Sentence()
            sentence.carrier = carrier
            sentence.words = words
            
            sentences.append(sentence)
            
            # add the sentence properties
            word_properties = eval(stimuli_file.readline())
            
            for j in range(0, len(word_properties)):
                syllables, stress = word_properties[j]
                sentence.words[j] = Word(sentence.words[j], syllables, stress)
    
    return sentences