#! /usr/bin/perl #################################################################### ### ### script name : eText2prompts.pl ### version: 0.1 ### created by: Ken MacLean ### mail: contact@voxforge.org ### Date: 2007.3.13 ### Command: perl ./eText2prompts.pl [infile-prompts] [outfile-prompts] ### ### Copyright (C) 2007 Ken MacLean ### ### This program is free software; you can redistribute it and/or ### modify it under the terms of the GNU General Public License ### as published by the Free Software Foundation; either version 2 ### of the License, or (at your option) any later version. ### ### This program is distributed in the hope that it will be useful, ### but WITHOUT ANY WARRANTY; without even the implied warranty of ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ### GNU General Public License for more details. ### #################################################################### use Lingua::EN::Sentence qw( get_sentences add_acronyms ); if ($#ARGV != 1) { print "usage: inputfilename outputfilename\n"; exit; } $inputfilename = $ARGV[0]; $outputfilename = $ARGV[1]; open(IN, "<$inputfilename") or die ("need input file name"); open(OUT, ">$outputfilename") or die ("need output file name"); @eText = ; # slurp in entire file into an array $eText = "@eText"; # convert the array to a scalar variable $eText =~ s/\n//g; # remove all line feeds from the text file #print $eText; my $sentences=get_sentences($eText); # Get the sentences from the text file. my $x=0; foreach my $sentence (@$sentences) { $sentence =~ tr/a-z/A-Z/; # change to uppercase $sentence =~ s/,//g; # remove commas $sentence =~ s/\.//g; # remove periods # $sentence =~ s/\'//g; # remove single quotes; but need words like "don't" - need to research this more ... $sentence =~ s/\"//g; # remove all double quotes $sentence =~ s/://g; # remove colon # $sentence =~ s/-//g; # compound word dash; but VoxForge dictionnary contains words with dashes ... $sentence =~ s/--//g; #double dash $sentence =~ s/ - / /g; # dash punctuation $sentence =~ s/ -/ /g; # dash punctuation $sentence =~ s/;//g; # semi-colon $sentence =~ s/!//g; # exclamation mark $sentence =~ s/\?//g; # question mark # Other cleanup !!!!!! need to change the prompts files directly rather than doing this!!! or add to dictionnary!!! $sentence =~ s/&/AND/g; print OUT "$sentence\n"; } close(IN); close(OUT);