Perl 为 Festival 编写的 Emacspeak speech server

#!/usr/bin/perl

# EmacSpeaks Festival
#
# A Emacspeak speech server for Festival written in Perl.
# Written by Mario Lang <mlang@delysid.org>
# Enhancements by Aaron Bingham <abingham@sfu.ca>
# The skeleton was taken from speechd (and modified alot of course).

use strict;

# Configuration

my $USE_ESD = 1; 

    # If nonzero use the Enlightened Sound Daemon for managing the
    # Festival server, otherwise run Festival directly.  Overridden by
    # FESTIVAL_COMMAND

my $LOGFILE = '/tmp/festival.log'; 

    # File to use for logging debugging messages.

my $FESTIVAL_HOST = 'localhost'; 

    # Hostname where Festival server is running.  If FESTIVAL_HOST is
    # 'localhost', a Festival server process will be started
    # automatically if none is currently running.

my $FESTIVAL_PORT = '1314'; 

    # Port Festival server listens to.

my $FESTIVAL_BIN = '/usr/bin/festival'; 

    # Path to Festival executable.

my $FESTIVAL_ARGS = '--server \'(set! server_access_list (list "localhost\.localdomain"))\'';

    # Arguments for the Festival process.  The default starts a server
    # process accessible to any user on the local machine.

my $FESTIVAL_ASYNC = 1; 

    # Use Festival in asynchronous mode if nonzero, otherwise use
    # synchronous mode.  Asynchronous mode allows the client to stop
    # in-progress speech, so the result is generally more responsive
    # behavior.  However, some users have complained that asynchronous
    # mode causes too many utterances to be surpressed (e.g. letter
    # names while typing fast).  If this is a problem for you, set
    # $FESTIVAL_ASYNC = 0.

my $SPEAKER_1 = 'rab_diphone';
my $SPEAKER_2 = 'kal_diphone';

    # Name of the festival speakers to use.  Change this to voices you have
    # installed.

# Internal variables

my $FILE_STUFF_KEY = 'ft_StUfF_key'; # This indicates a new prompt from Festival
my $handle      = undef;                      # this is for the tcp connection...

my $queued_text = '';

my $FESTIVAL_COMMAND;
if ($USE_ESD) {
    $FESTIVAL_COMMAND = "esddsp $FESTIVAL_BIN $FESTIVAL_ARGS &";
} else {
    $FESTIVAL_COMMAND = "$FESTIVAL_BIN $FESTIVAL_ARGS &";
}

#includes libs for TCP socket connection to Festival
use IO::Socket;

my $err = 0;
my $pronounce_punctuation = 0;
my $emacs_in_braces = 0;
my @pq_queues = ([], []);
my %sable_params = ('speaker'=>$SPEAKER_1, 'rate'=>1, 'base'=>150, 
		    'range'=>10, 'mid'=>1);
my $festival_busy = 0;
my $emacs_lines = 0;
my $line_number = 0;

sub main {

    my $emacstext = '';
    my $festtext = '';

    &log("INIT: FESTIVAL_COMMAND = $FESTIVAL_COMMAND\n");
    # create a tcp connection to the festival server
    &connect_to_festival();
    &log("INIT: Starting loop.\n");

    # If Festival closes the connection, try to reconnect
    local $SIG{PIPE} = \&connect_to_festival;

    my $info;

    while (1) {
        my $rin;
        vec($rin, fileno(STDIN), 1) = 1;
        vec($rin, fileno($handle), 1) = 1;
        select($rin, undef, undef, undef);
        if (vec($rin, fileno(STDIN), 1)) {
	   my $buf;
	   sysread STDIN,$buf,1024;
	   if (!$buf) {
	       &log("Unexpected EOF in STDIN\n");
	       exit 1;
	   }
	   $emacstext .= $buf;
	   $emacstext = &handle_emacs_input($emacstext);	
        }
        if (vec($rin, fileno($handle), 1)) {
	   my $buf;
	   sysread $handle,$buf,1024;
	   if (!$buf) {
	       &log("Unexpected EOF in Festival socket\n");
	       exit 1;
	   }
	   $festtext .= $buf;
	   $festtext = &handle_festival_input($festtext);
        }
        &log("\$festival_busy = $festival_busy\n");
        while (!$festival_busy && !&pq_empty()) {
	   &send_command;
        }
    }
}

sub log {
    # Write a message to the logfile
    my $msg = shift;
    open(LOG, ">>$LOGFILE") or die "Cant open logfile\n";
    print LOG $msg;
    close LOG;
}

sub handle_emacs_input {
    my $emacstext = shift;
    while (1) {
	my ($line, $eol, $remainder) = split /(\n)/, $emacstext, 2;
	last if !$eol;
	$emacstext = $remainder;
	&handle_emacs_line($line . $eol);
    }
    return $emacstext;
}


sub handle_emacs_line {
    my $line = shift;
    &log("emacspeak: $line");
    $emacs_lines .= $line;
    if ($line =~ /{[^}]*\n/) {
	$emacs_in_braces = 1;
    } elsif ($line =~ /}/) {
        $emacs_in_braces = 0;
    }
    if (!$emacs_in_braces) {
        handle_emacs_command($emacs_lines);
	$emacs_lines = '';
    }
}

sub handle_festival_input {
    my $festtext = shift;
    while (1) {
	my ($line, $eol, $remainder) = split /(\n)/, $festtext, 2;
	last if !$eol;
	$festtext = $remainder;
	&handle_festival_line($line . $eol);
    }
    return $festtext;
}

sub handle_festival_line {
    my $text = shift;
    &log("festival: $text");

    if ($text =~ /^$FILE_STUFF_KEY/) {
	$festival_busy = 0;
    } elsif ($text =~ /^(OK|ER)/) {
	# the festival session is in a wierd state.  reconnect.
	&log("the festival is in a wierd state.  reconnect.\n");
	&send_to_festival("(quit)");
	&connect_to_festival();
    }
}

sub handle_emacs_command {
    my $text = shift;
    if ($text =~ /^\s*exit\n/) {
        &quit();
    } elsif ($text =~ /^\s*q\s+\{(.*)}/s) {
        &queue_speech($1);
    } elsif ($text =~ /^\s*t\s+(\d+)\s(\d+)/) {
	&tone($1, $2);
    } elsif ($text =~ /^\s*p\s+(.*)\n/) {
	&play_sound($1);
    } elsif ($text =~ /^\s*d/) {
        &flush_speech();
    } elsif ($text =~ /^\s*tts_say \{(.*)\}/) {
	&say($1);
    } elsif ($text =~ /^\s*l \{([^}])\}/) {
	&letter($1);
    } elsif ($text =~ /^\s*s/) {
	&stop();
    } elsif ($text =~ /^\s*tts_set_punctuations (\w+)/) {
	&set_punctuation($1);
    } elsif ($text =~ /^\s*tts_set_speech_rate (\d+)/) {
	&set_speech_rate($1)
    } elsif ($text =~ /^\s*tts_sync_state (\w+) (\d+) (\d+) (\d+) (\d+)/) {
	&set_punctuation($1);
	&set_speech_rate($5);
    } else {
       $err++;
       &log("$line_number: err$err: $text\n");
    }
    return 1;
}

# Actions

sub quit {
    &pq_clear();
    &stop();
    &send_to_festival("(quit)");
    exit 0;
}

sub tone {
    my $pitch = shift; # pitch in Hz
    my $duration = shift; # duration in ms

    # Run asynchronously for better responsiveness
    system("beep -f $pitch -l $duration &");
}

sub play_sound {
    my $filename = shift;
    my $url = "file://" . &url_quote($filename);
    &send_sable("<SABLE><AUDIO SRC=\"$url\"/></SABLE>");
}

sub flush_speech {
    # Flush all queued speech to the speech generator.
    if ($queued_text ne '') {
        speak($queued_text);
    }
    $queued_text = '';
}

sub letter {
    my $char = shift;
    my $content = &sgml_quote($char);
    &send_sable("<SABLE><SAYAS MODE=\"literal\">$content</SAYAS></SABLE>");
}

sub set_punctuation {
    my $mode = shift;
    if ($1 eq "all") {
	$pronounce_punctuation = 1;
    } else {
	$pronounce_punctuation = 0;
    }
}

sub set_speech_rate {
    my $dtk_rate = shift;
    # 225.0 was picked as it gives reasonable behavior.  I do not
    # know if the result is slower or faster than the DECTalk.
    $sable_params{'rate'} = $dtk_rate/225.0;
}

sub stop {
    # The queue must be cleared immediately so that any queued
    # commands recieved before the stop do not get run afterwards.    
    &pq_clear(); 
    $queued_text = '';
    if ($FESTIVAL_ASYNC) {
	&send_to_festival("(audio_mode 'shutup)");
    }
}

sub queue_speech {
    # Save speech to be sent later.
    my $text = shift;
    $queued_text .= $text;
}

sub say {
    my $text = shift;
    &speak($text);
}

sub speak {
    my $text = shift;
    if ($text =~ /\S+/) {
        foreach my $sable (&dtk_to_sable($text)) {
  	    &send_sable($sable);
        }
    } else { 
        &log("$line_number: Empty queue, nothing sent. \n"); 
    }
}

sub send_sable {
    my $sable = shift;
    $sable =~ s/"/\\"/g;
    &send_to_festival("(tts_text \"$sable\" 'sable)");
}

sub dtk_to_sable {
    my $dtk = shift;
    my @items = &dtk_parse($dtk);

    # '[*]' seems to be used as an alternative (perhaps shorter) to 
    # a space character.

    $dtk =~ s/\[\*\]/ /;

    my @sable_docs = ();
    foreach my $item (@items) {
        my $type = $item->[0];
        my $value = $item->[1];
	&log("($type, $value)\n");
        if ($type eq 'TEXT') {
            push @sable_docs, &text_to_sable($value);
        } elsif ($type eq 'COMMAND') {
	    $value =~ s/\s+//; # trim leading whitespace
            &handle_dtk_command(split /\s+/, $value);
        }
    }
    return @sable_docs;
}

sub dtk_parse {
    my $dtk = shift;

    # Return a list of (type, value) tuples, where type is either
    # COMMAND or TEXT.

    my $in_command = 0;
    my @items = ();
    my $value = '';
    for (my $i = 0; $i <= length $dtk; $i++) {
        my $ch = substr $dtk, $i, 1;
	if ($ch eq '[') {
            if (!$in_command) {
  	        if ($value ne '') {
		    my @item = ('TEXT', $value);
		    push @items, \@item;
  	        }
            } else {
                &log("ERROR: [ found while looking for ]\n");
            }
	    $in_command = 1;
   	    $value = '';
        } elsif ($ch eq ']') {
            if ($in_command) {
  	        if ($value ne '') {
		    my @item = ('COMMAND', $value);
		    push @items, \@item;
	        }
            } else {
                &log("ERROR: ] found while looking for [\n");
            }
	    $in_command = 0;
   	    $value = '';
        } else {
            $value .= $ch;
	}
    }
    if ($value ne '' && !$in_command) {
	my @item = ('TEXT', $value);
        push @items, \@item;
    }
    if ($in_command) {
        &log("ERROR: ] expected\n");
    }

    return @items;
}
	    

sub handle_dtk_command {
    my @cmdlist = @_;
    &log ("cmdlist: @cmdlist\n");
    &log ("cmdlist[0]: $cmdlist[0]\n");
    if ($cmdlist[0] =~ /:np/) {
	$sable_params{'speaker'} = $SPEAKER_1;
	$sable_params{'base'} = 100;
        $sable_params{'range'} = 10;
    } elsif ($cmdlist[0] =~ /:nh/) {
	$sable_params{'speaker'} = $SPEAKER_2;
	$sable_params{'base'} = 100;
        $sable_params{'range'} = 10;
    } elsif ($cmdlist[0] =~ /:nd/) {
	$sable_params{'speaker'} = $SPEAKER_1;
	$sable_params{'base'} = 150;
        $sable_params{'range'} = 10;
    } elsif ($cmdlist[0] =~ /:nf/) {
	$sable_params{'speaker'} = $SPEAKER_2;
	$sable_params{'base'} = 150;
        $sable_params{'range'} = 10;
    } elsif ($cmdlist[0] =~ /:nb/) {
	$sable_params{'speaker'} = $SPEAKER_1;
	$sable_params{'base'} = 200;
        $sable_params{'range'} = 10;
    } elsif ($cmdlist[0] =~ /:nu/) {
	$sable_params{'speaker'} = $SPEAKER_2;
	$sable_params{'base'} = 200;
        $sable_params{'range'} = 10;
    } elsif ($cmdlist[0] =~ /:nr/) {
	$sable_params{'speaker'} = $SPEAKER_1;
	$sable_params{'base'} = 300;
        $sable_params{'range'} = 10;
    } elsif ($cmdlist[0] =~ /:nw/) {
	$sable_params{'speaker'} = $SPEAKER_2;
	$sable_params{'base'} = 300;
        $sable_params{'range'} = 10;
    } elsif ($cmdlist[0] =~ /:nk/) {
	$sable_params{'speaker'} = $SPEAKER_1;
	$sable_params{'base'} = 400;
        $sable_params{'range'} = 10;
    } elsif ($cmdlist[0] =~ /:dv/) {
	&log ("cmdlist (dv): @cmdlist\n");
	for (my $j = 1; $j <= $#cmdlist; $j+=2) {
	    print "param: $cmdlist[$j] $cmdlist[$j+1]\n";
	    if ($cmdlist[$j] =~ /ap/) {
                # Average pitch (Hz)
		$sable_params{'base'} = $cmdlist[$j+1];
	    }
	    if ($cmdlist[$j] =~ /pr/) {
                # Pitch range.  The Dectalk parameter ranges between 0
                # -- a flat monnotone -- and 250 -- a highly animated
                # voice.
		$sable_params{'range'} = 2.0*sqrt(5.0*$cmdlist[$j+1]);
	    }
	}
    }
}

sub text_to_sable {
    my $text = shift;

    # Convert a string of plain text to a SABLE document, using the current
    # parameters.

    if ($pronounce_punctuation) {
	$text =~ s/[`]/ backquote /g;
	$text =~ s/[!]/ bang /g;
	$text =~ s/[(]/ left paren /g;
	$text =~ s/[)]/ right paren /g;
	$text =~ s/[-]/ dash /g;
	$text =~ s/[{]/ left brace /g;
	$text =~ s/[}]/ right brace /g;
	$text =~ s/[:]/ colon /g;
	$text =~ s/[;]/ semi /g;
	$text =~ s/["]/ quotes /g;
	$text =~ s/[']/ apostrophe /g;
	$text =~ s/[,]/ comma /g;
	$text =~ s/[.]/ dot /g;
	$text =~ s/[?]/ question /g;
    }
    # escape SGML-unsafe characters
    $text = sgml_quote($text);

    return <<HERE
<SABLE>
 <SPEAKER NAME="$sable_params{'speaker'}">
  <RATE SPEED="$sable_params{'rate'}">
   <PITCH BASE="$sable_params{'base'}" 
          RANGE="$sable_params{'range'}" 
          MID="$sable_params{'mid'}">
    $text
   </PITCH>
  </RATE>
 </SPEAKER>
</SABLE>
HERE
}

sub sgml_quote {
    my $text = shift;
    $text =~ s/&/&amp;/g;
    $text =~ s/</&lt;/g;
    $text =~ s/>/&gt;/g;
    return $text;
}

sub url_quote {
    # XXX: incomplete!
    my $text = shift;
    $text =~ s/ /%20/;
    return $text;
}

sub send_to_festival {
    my $command = shift;
    # queue $command for sending to Festival
    &pq_push($command, 1);
}

sub send_command {
    # send a single queued command
    if ($handle) {   # Sanity checks are always nice...
	&send_direct(&pq_pop());
    } else {
	&connect_to_festival;
    }
}

sub send_direct {
    my $command = shift;
    # send $command to festival immediately, without affecting the queue
    &log("$line_number: festival> " . $command . "\n");
    print($handle $command . "\n") or die "Could not write to Festival ($!)\n";
    $festival_busy = 1;
}


sub connect_to_festival
{
  my $tries = 0;
  my $MAX_RECONNECT_TRIES = 10;
  $handle = '';
  while ($handle eq '' and $tries < $MAX_RECONNECT_TRIES)
  {
    &log("($tries) Attempting to connect to the Festival server.\n");
    if ($handle = IO::Socket::INET->new(Proto     => 'tcp',
                                        PeerAddr  => $FESTIVAL_HOST,
                                        PeerPort  => $FESTIVAL_PORT))
    {
      &log("Successfully opened connection to Festival.\n");
    } else
    {
      if ($tries)
      {
        &log("Waiting for Festival server to load -- Can't connect to port $FESTIVAL_PORT on $FESTIVAL_HOST yet ($!).\n");
      } else
      {
        if ($FESTIVAL_HOST eq 'localhost') {
          &log("Failed to connect to Festival server, attempting to load it myself.\n");
          system ($FESTIVAL_COMMAND);
        }
      }
      sleep 1;
    }
    $tries++;
  }

  if ($handle eq '') {
      die "ERROR: can't connect to Festival server!";
  }

  $handle->autoflush(1);     # so output gets there right away

  $festival_busy = 0;

  if ($FESTIVAL_ASYNC) {
      # Set festival to async mode.  We have to call send_direct here to
      # ensure that no commands from emacspeak hava a chance to get
      # executed before this one.

      &send_direct("(audio_mode 'async)");
  }

}

#
# priority queue implementation
#

sub pq_push {
    my $item = shift;
    my $pri = shift;
    push @{$pq_queues[$pri]}, $item;
}

sub pq_pop {
    for (my $i = 0; $i <= $#pq_queues; $i++) {
	if ($#{$pq_queues[$i]} >= 0) {
	    my $item = shift @{$pq_queues[$i]};
	    return $item;
        }
    }
}

sub pq_clear {
    for (my $i = 0; $i <= $#pq_queues; $i++) {
	$pq_queues[$i] = [];
    }
}

sub pq_empty {
    for (my $i = 0; $i <= $#pq_queues; $i++) {
	if ($#{$pq_queues[$i]} >= 0) {
	    return 0;
        }
    }
    return 1;
}

&main();

__END__

=head1 NAME

festival-server - Emacspeak server for the Festival speech synthesizer

=head1 SYNOPSIS

speechd 

=head1 DESCRIPTION

festival-server is a perl script doing Emacspeak server syntax
to festival conversion.
This method makes use of the SABLE markup mode of festival.

Tones are supported if you have beep installed.

=head1 OPTIONS

=head1 FILES

=over 4

=item
/usr/share/emacs/site-lisp/emacspeak/festival-server

编程技巧