# # Create_JS_wordlists.praat # # Praat script converting wordlsit files into Javascript code # # Copyright (C) 2016 R.J.J.H. van Son and the Netherlands Cancer Institute # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA # form Source directory and target file sentence Source_directory ../wordlists sentence Target_file wordlists_plus.js endform Text writing preferences... UTF-8 call tables2javascript "'source_directory$'" 'target_file$' # Definitions procedure tables2javascript .sourceDir$ .targetFile$ # Iterate over all tables in the directory if fileReadable(.sourceDir$) deleteFile(.targetFile$) # Start new file fileappend '.targetFile$' /* 'target_file$''newline$' fileappend '.targetFile$' * 'newline$' fileappend '.targetFile$' * Tables and this code are licensed under the GNU GPL version 2'newline$' fileappend '.targetFile$' * or later.'newline$' fileappend '.targetFile$' * 'newline$' fileappend '.targetFile$' */ 'newline$' fileappend '.targetFile$' 'newline$' fileappend '.targetFile$' var wordlists = ['newline$' # .nameStart = rindex(.targetFile$, "/")+1 .nameEnd = rindex(.targetFile$, ".") -.nameStart .newTableNameList$ = mid$(.targetFile$, .nameStart, .nameEnd) Create Table with column names... '.newTableNameList$' 0 Name # Create a list of Tables with Paths Create Table with column names... ListOfTables 0 Name Directory recursion = 0 call createListOfTables ListOfTables '.sourceDir$' select Table ListOfTables .numOfTables = Get number of rows for .i to .numOfTables select Table ListOfTables .table$ = Get value... '.i' Name .tableDir$ = Get value... '.i' Directory # Get Table Read from file... '.tableDir$'/'.table$' .tableName$ = selected$("Table") if .tableName$ = "wordlist" or .tableName$ = "table" .nameStart = rindex(.tableDir$, "/")+1 .nameEnd = length(.tableDir$)+1 -.nameStart .newTableName$ = mid$(.tableDir$, .nameStart, .nameEnd) select Table '.tableName$' Rename... '.newTableName$' .tableName$ = selected$("Table") endif select Table '.newTableNameList$' Append row .currentTableNum = Get number of rows Set string value... '.currentTableNum' Name '.tableName$' # Convert table call table2objectlist '.tableName$' '.targetFile$' fileappend '.targetFile$' 'tab$''tab$'] if .i < .numOfTables fileappend '.targetFile$' , endif fileappend '.targetFile$' 'newline$' select Table '.tableName$' Remove endfor # Close the file fileappend '.targetFile$' 'tab$']'newline$''newline$' select Table '.newTableNameList$' plus Table ListOfTables Remove else exit Directory not found: '.sourceDir$' endif endproc # Convert a single table to a Praat script procedure table2objectlist .tableName$ .targetFile$ select Table '.tableName$' .space$ = " " # Collect information .numberOfColumns = Get number of columns .numberOfRows = Get number of rows # Set name of procedure as variable .tableVariableName$ = replace_regex$(.tableName$, "_", " ", 0); # Start output fileappend '.targetFile$' 'tab$''tab$'['newline$' fileappend '.targetFile$' 'tab$''tab$'"'.tableVariableName$'", ['newline$' # Create table with columns # Fill the table .labelList$[1] = "Pinyin" .labelList$[2] = "Character" .labelList$[3] = "Translation" .labelList$[4] = "Lesson" .labelList$[5] = "Sound" for .row to .numberOfRows fileappend '.targetFile$' 'tab$''tab$''tab$'[ separator$ = "" for .col to 5 .label$ = .labelList$[.col] .colIDX = Get column index: .label$ if .colIDX > 0 .value$ = Get value... '.row' '.label$' else .value$ = "-" endif if .value$ = "" .value$ = "-" endif fileappend '.targetFile$' 'separator$'"'.value$'" separator$ = "," if .label$ = "Pinyin" call numbers2pinyin '.value$' fileappend '.targetFile$' 'separator$'"'numbers2pinyin.pinyin$'" endif endfor fileappend '.targetFile$' ] if .row < .numberOfRows fileappend '.targetFile$' , endif fileappend '.targetFile$' 'newline$' endfor fileappend '.targetFile$' 'tab$''tab$''tab$']'newline$' endproc # .listName$ is name of table to recieve all file names # Labels are Name and Directory # Who says you cannot do recursion in Praat? # This is eerily fragile code. recursion = 0 procedure createListOfTables .listName$ .topDirectory$ recursion += 1 .listName'recursion'$ = .listName$ .topDirectory'recursion'$ = .topDirectory$ # Files .currentTopDirectory$ = .topDirectory'recursion'$ Create Strings as file list... Files '.currentTopDirectory$'/*.Table .numOfFiles'recursion' = Get number of strings for .i to .numOfFiles'recursion' select Strings Files .table'recursion'$ = Get string... '.i' .currentListName$ = .listName'recursion'$ select Table '.currentListName$' Append row .numRows = Get number of rows .currentTable$ = .table'recursion'$ .currentTopDirectory$ = .topDirectory'recursion'$ Set string value... '.numRows' Name '.currentTable$' Set string value... '.numRows' Directory '.currentTopDirectory$' endfor select Strings Files Remove # Recurse into directories .currentTopDirectory$ = .topDirectory'recursion'$ Create Strings as directory list... Directories '.currentTopDirectory$' .numOfDirectories'recursion' = Get number of strings for .i'recursion' to .numOfDirectories'recursion' select Strings Directories .currentI = .i'recursion' .directory'recursion'$ = Get string... '.currentI' .currentTopDirectory$ = .topDirectory'recursion'$ .currentDirectory$ = .directory'recursion'$ call createListOfTables '.listName$' '.currentTopDirectory$'/'.currentDirectory$' endfor select Strings Directories Remove recursion -= 1 endproc procedure numbers2pinyin .numberstext$ .intermediatePinyin$ = .numberstext$ # Add a `-quote between vowels .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "([aeuiov])([0-9])([aeuiov])", "\1\2'\3", 0) # Move numbers to the nucleus vowel # To the vowel .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "([aeuiov])([^aeuiov0-9]*)([0-9])", "\1\3\2", 0) # Either a/e .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "([ae])([aeuiov]*)([0-9])", "\1\3\2", 0) # Or the Oo in /ou/ .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "(ou)([0-9])", "o\2u", 0) # or the second vowel .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "([uiov][aeuiov])([uiov])([0-9])", "\1\3\2", 0) # Convert all tones to special characters # Tone 1 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a1", "ā", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e1", "ē", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u1", "ū", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i1", "ī", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o1", "ō", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v1", "ǖ", 0) # Tone 2 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a2", "á", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e2", "é", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u2", "ú", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i2", "í", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o2", "ó", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v2", "ǘ", 0) # Tone 3 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a3", "ǎ", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e3", "ě", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u3", "ǔ", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i3", "ǐ", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o3", "ǒ", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v3", "ǚ", 0) # Tone 4 .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a4", "à", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e4", "è", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u4", "ù", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i4", "ì", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o4", "ò", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v4", "ǜ", 0) # Tone 0 # Remove tone 0 symbol completely .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "0", "", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "a0", "a", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "e0", "e", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "u0", "u", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i0", "i", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "o0", "o", 0) .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "v0", "ü", 0) # Pick best vowel symbols available in cases not caught before # Ugly clutch to get the 1, 3, 0 tone diacritics at least in the neighbourhood .intermediatePinyin$ = replace_regex$(.intermediatePinyin$, "i(\\[-N0]\^)", "i\\s{_ }\1", 0) .pinyin$ = .intermediatePinyin$ endproc procedure convert_praat_to_utf8 .text$ .text$ = replace_regex$(.text$, "\\a""", "\xc3\xa4", 0) .text$ = replace_regex$(.text$, "\\A""", "\xc3\x84", 0) .text$ = replace_regex$(.text$, "\\o""", "\xc3\xb6", 0) .text$ = replace_regex$(.text$, "\\O""", "\xc3\x96", 0) .text$ = replace_regex$(.text$, "\\u""", "\xc3\xbc", 0) .text$ = replace_regex$(.text$, "\\U""", "\xc3\x9c", 0) .text$ = replace_regex$(.text$, "\\i""", "\xc3\xaf", 0) .text$ = replace_regex$(.text$, "\\I""", "\xc3\x8f", 0) .text$ = replace_regex$(.text$, "\\e""", "\xc3\xab", 0) .text$ = replace_regex$(.text$, "\\E""", "\xc3\x8b", 0) .text$ = replace_regex$(.text$, "\\y""", "\xc3\xbf", 0) .text$ = replace_regex$(.text$, "\\Y""", "\xc3\x9f", 0) .text$ = replace_regex$(.text$, "\\e'", "\xc3\xa9", 0) .text$ = replace_regex$(.text$, "\\E'", "\xc3\x89", 0) .text$ = replace_regex$(.text$, "\\ss", "\xc3\x9f", 0) .text$ = replace_regex$(.text$, "\\bu", "\xc3\x95", 0) endproc