; CZ1 mbrola: czech voice for festival. Requires festival, mbrola and ; cz1 database. ; ; Master copy is located at http://atrey.karlin.mff.cuni.cz/~pavel/cz1_mbrola.scm. ; Please send patches to pavel@ucw.cz. ; ; Put this into lib/voices/czech/cz1_mbrola/festvox/ and put cz1 ; database to lib/voices/czech/cz1_mbrola. ; ; Then do (set! voice_default 'voice_cz1_mbrola) to use it. ; ; ; Copyright 2000 Pavel Machek ; ; Version 0.4.2 ; ; You MAY use this software under terms of GNU GPL, or under following license: ; ; Permission is hereby granted, free of charge, to use and distribute ; this software and its documentation without restriction, including ; without limitation the rights to use, copy, modify, merge, publish, ; distribute, sublicense, and/or sell copies of this work, and to ; permit persons to whom this work is furnished to do so, subject to ; the following conditions: ; 1. The code must retain the above copyright notice, this list of ; conditions and the following disclaimer. ; 2. Original authors' names are not deleted. ; ; EVERYONE ; DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ; THIS SOFTWARE. ; ; Festival is buggy, and ignores current locale setting. It also has ; lowercase letters hardcoded to a-z, and similar fatal bugs ; w.r.t. localization. I try to workaround it. ; ; FIXME: should create my own _accent_cart_tree and use it ; (set! cz1_mbrola_dir (cdr (assoc 'cz1_mbrola voice-locations))) (require 'mbrola) (defPhoneSet czech ;;; Phone Features (;; vowel or consonant (vc + -) ;; vowel length: short long diphthong schwa (vlng s l d a 0) ;; vowel height: high mid low (vheight 1 2 3 -) ;; vowel frontness: front mid back (vfront 1 2 3 -) ;; lip rounding (vrnd + -) ;; consonant type: stop fricative affricative nasal liquid (ctype s f a n l 0) ;; place of articulation: labial alveolar palatal labio-dental ;; dental velar (cplace l a p b d v 0) ;; consonant voicing (cvox + -) ) ;; Phone set members (features not set properly: copied from spanish) ( (_ - 0 - - - 0 0 -) (a + l 3 1 - 0 0 -) (a: + l 3 1 - 0 0 -) (b - 0 - - + s l +) (c - 0 - - + s v +) (d - 0 - - + s a +) (D - 0 - - + s a +) (e + l 2 1 - 0 0 -) (e: + l 2 1 - 0 0 -) (f - 0 - - + f b -) (g - 0 - - + s p +) (h - 0 - - + a a -) (i + l 1 1 - 0 0 -) (i: + l 1 1 - 0 0 -) (j - 0 - - + l a +) (k - 0 - - + s p -) (l - 0 - - + l d +) (m - 0 - - + n l +) (n - 0 - - + n d +) (N - 0 - - + n d +) (o + l 3 3 - 0 0 -) (o: + l 3 3 - 0 0 -) (p - 0 - - + s l -) (r - 0 - - + l p +) (r\ - 0 - - + l p +) (s - 0 - - + f a +) (S - 0 - - + f a +) (t - 0 - - + s v +) (T - 0 - - + s v +) (u + l 1 3 + 0 0 -) (u: + l 1 3 + 0 0 -) (v - 0 - - + n l +) (x - 0 - - + a a -) (z - 0 - - + f a +) (Z - 0 - - + f a +) (tS - 0 - - + s v +) ) ) (set! czech_el_phone_data ; This is also not right '( (_ 0.0 0.250) (a 0.0 0.090) (a: 0.0 0.150) (b 0.0 0.065) (c 0.0 0.065) (d 0.0 0.060) (D 0.0 0.060) (e 0.0 0.090) (e: 0.0 0.150) (f 0.0 0.100) (g 0.0 0.080) (h 0.0 0.135) (i 0.0 0.080) (i: 0.0 0.150) (j 0.0 0.100) (k 0.0 0.100) (l 0.0 0.080) (m 0.0 0.070) (n 0.0 0.080) (N 0.0 0.080) (o 0.0 0.090) (o: 0.0 0.150) (p 0.0 0.100) (r 0.0 0.060) (r\ 0.0 0.070) (s 0.0 0.110) (S 0.0 0.110) (t 0.0 0.085) (T 0.0 0.085) (u 0.0 0.080) (u: 0.0 0.150) (v 0.0 0.100) (x 0.0 0.135) (z 0.0 0.110) (Z 0.0 0.110) (tS 0.0 0.110) )) (set! czech_dur_tree ' ((R:SylStructure.parent.R:Syllable.p.syl_break > 1 ) ;; clause initial ((R:SylStructure.parent.stress is 1) ((1.5)) ((1.2))) ((R:SylStructure.parent.syl_break > 1) ;; clause final ((R:SylStructure.parent.stress is 1) ((2.0)) ((1.5))) ((R:SylStructure.parent.stress is 1) ((1.2)) ((1.0)))))) (PhoneSet.silences '(_)) (lex.create "czech") (lex.set.phoneset "czech") (lex.add.entry '("pocitac" nil ( ((p o) 1) ((tS i:) 0) ((t a tS) 0) )) ) (lex.add.entry '("pocitaca" nil ( ((p o) 0) ((tS i:) 1) ((t a tS) 0) )) ) (lex.add.entry '("pocitacb" nil ( ((p o) 0) ((tS i:) 0) ((t a tS) 1) )) ) (lts.ruleset ; Name of rule set czech_ruleset ; Sets used in the rules ( (MEKCIDLO i í ì Í Ì) ) ; Rules ( ( [ a ] = a ) ( [ á ] = a: ) ( [ b ] = b ) ( [ c h ] = x ) ( [ c ] = c ) ( [ È ] = tS ) ( [ è ] = tS ) ( [ d ] MEKCIDLO = D ) ( [ d ] = d ) ( [ ï ] = D ) ( [ Ï ] = D ) ( [ e ] = e ) ( [ é ] = e: ) ( [ ì ] = e ) ( [ Ì ] = e ) ( [ f ] = f ) ( [ g ] = g ) ( [ h ] = h ) ( [ i ] = i ) ( [ í ] = i: ) ( [ Í ] = i: ) ( [ j ] = j ) ( [ k ] = k ) ( [ l ] = l ) ( [ m ì ] = m N e ) ( [ m Ì ] = m N e ) ( [ m ] = m ) ( [ n ] MEKCIDLO = N ) ( [ n ] = n ) ( [ ò ] = N ) ( [ Ò ] = N ) ( [ o ] = o ) ( [ ó ] = o: ) ( [ Ó ] = o: ) ( [ p ] ì = p j ) ( [ p ] Ì = p j ) ( [ p ] = p ) ( [ q ] = k v ) ( [ r ] = r ) ( [ ø ] = r\ ) ( [ Ø ] = r\ ) ( [ s ] = s ) ( [ ¹ ] = S ) ( [ © ] = S ) ( [ t ] MEKCIDLO = T ) ( [ t ] = t ) ( [ » ] = T ) ( [ « ] = T ) ( [ u ] = u ) ( [ ú ] = u: ) ( [ Ú ] = u: ) ( [ ù ] = u: ) ( [ Ù ] = u: ) ( [ v ] ì = v j ) ( [ v ] Ì = v j ) ( [ v ] = v ) ( [ w ] = v ) ( [ x ] = k s ) ( [ y ] = i ) ( [ ý ] = i: ) ( [ Ý ] = i: ) ( [ z ] = z ) ( [ ¾ ] = Z ) ( [ ® ] = Z ) ( [ 1 ] = j e d n a _ ) ( [ 2 ] = d v a _ ) ( [ 3 ] = t r\ i _ ) ( [ 4 ] = tS t i r\ i _ ) ( [ 5 ] = p j e t _ ) ( [ 6 ] = S e s t _ ) ( [ 7 ] = s e d m _ ) ( [ 8 ] = o s m _ ) ( [ 9 ] = d e v j e t _ ) ( [ 0 ] = n u l a _ ) ( [ "." ] = _ ) ( [ "?" ] = _ ) ( [ "-" ] = _ ) ( [ ":" ] = _ ) ( [ "," ] = _ ) ( [ "_" ] = _ ) ( [ ">" ] = _ ) ( [ "<" ] = _ ) ( [ "(" ] = _ ) ( [ ")" ] = _ ) ( [ "!" ] = _ ) )) (define (czech_lts word features) "(czech_lts WORD FEATURES) Using letter to sound rules build a spanish pronunciation of WORD." (list word nil (lex.syllabify.phstress (lts.apply (downcase word) 'czech_ruleset)))) (lex.set.lts.method 'czech_lts) (set! czech_phrase_cart_tree ' ((lisp_token_end_punc in ("?" "." ":")) ((BB)) ((lisp_token_end_punc in ("'" "\"" "," ";")) ((B)) ((n.name is 0) ;; end of utterance ((BB)) ((NB)))))) (define (czech_token_to_words token name) "(czech_token_to_words TOKEN NAME) This is workaround for ugly bugs w.r.t. iso-8859-2 in core festival" (list name) ) (set! czech_accent_cart_tree ' ((R:SylStructure.parent.gpos is content) ((stress is 1) ; ((Accented)) ((position_type is single) ; ((Accented)) ((NONE)))) ((NONE)))) (define (voice_cz1_mbrola) "(voice_cz1_mbrola) Set up the current voice to be female Czech using mbrola." ;; Phone set (voice_reset) (Parameter.set 'Language 'czech) (Parameter.set 'PhoneSet 'czech) (PhoneSet.select 'czech) ;; Tokenization rules (set! token_to_words czech_token_to_words) ;; POS tagger ; (require 'pos) (set! pos_supported nil) ; (set! pos_lex_name nil) ; If I enable this, it just says nothing (set! pos_ngram_name 'english_pos_ngram) ;; Lexicon selection (lex.select "czech") ;; Phrase prediction (require 'phrase) (set! phrase_cart_tree czech_phrase_cart_tree) (Parameter.set 'Phrase_Method 'cart_tree) ;; Accent and tone prediction (require 'tobi) (set! int_tone_cart_tree f2b_int_tone_cart_tree) (set! int_accent_cart_tree f2b_int_accent_cart_tree) ;f2b_int gives better results than czech ? (set! postlex_vowel_reduce_cart_tree postlex_vowel_reduce_cart_data) ;; F0 prediction (set! int_simple_params '((f0_mean 180) (f0_std 15))) (Parameter.set 'Int_Target_Method 'Simple) (Parameter.set 'Int_Method 'Simple) ;; Duration prediction (set! duration_cart_tree czech_dur_tree) (set! duration_ph_info czech_el_phone_data) (Parameter.set 'Duration_Method Duration_Tree_ZScores) (Parameter.set 'Duration_Stretch 1.1) ;; Waveform synthesizer (set! us_abs_offset 0.0) (set! window_factor 1.0) (set! us_rel_offset 0.0) (set! us_gain 0.9) (Parameter.set 'Synth_Method 'MBROLA_Synth) (set! mbrola_progname "mbrola") (set! mbrola_database (format nil "%s%s " cz1_mbrola_dir "cz1/cz1" )) (set! current-voice 'cz1_mbrola) ) (proclaim_voice 'cz1_mbrola '((language czech) (gender female) (dialect none) (description "This is test czech voice using mbrola."))) (provide 'cz1_mbrola)