#!/usr/bin/perl -w
use strict;
use HTML::FromText;
# Convert the MST files in here into nice HTML files. Retain the linebreaks
# because that's the most authentic way to read it. :)
# These are the various "speakers" that are to be color coded. Over the course
# of time and many fanfics this list has grown quite large. Annoyingly, there
# isn't a lot of consistency (even with fanfics by the same author), so this
# list has a lot of alternate spellings and permutations in it. Good thing
# Perl hashes are fast
my %speakers =
(
"Mike" => "#000099",
"MIKE" => "#000099",
"MIKE B." => "#000099",
"Joel" => "#000099",
"JOEL" => "#000099",
"Crow" => "#999900",
"CROW" => "#999900",
"Tom" => "#990000",
"TOM" => "#990000",
"TOM09" => "#990000",
"TOM02" => "#990000",
"TOM SERVO" => "#990000",
"SERVO" => "#990000",
"Gypsy"=> "#990099",
"GYPSY"=> "#990099",
"CAMBOT" => "#555500",
"All" => "#009900",
"ALL" => "#009900",
"BOTS" => "#009900",
"MIKE \& BOTS" => "#009900",
"Bots" => "#009900",
"BOTH" => "#009900",
"Both" => "#009900",
"The bots" => "#009900",
"CROWD" => "#009900",
"Crowd" => "#009900",
"OTHERS" => "#009900",
"others" => "#009900",
"Others" => "#009900",
"Tom and Crow" => "#009900",
"TOM and CROW" => "#009900",
"Mike and Tom" => "#009900",
"Mike \& Crow" => "#009900",
"MIKE \& TOM" => "#009900",
"MIKE \& CROW" => "#009900",
"MIKE \& SERVO" => "#009900",
"CROW \& MIKE" => "#009900",
"CROW \& SERVO" => "#009900",
"CROW \& TOM" => "#009900",
"CROW and TOM" => "#009900",
"Tom \& Mike" => "#009900",
"Mike \& Tom" => "#009900",
"TOM \& MIKE" => "#009900",
"Crow \& Mike" => "#009900",
"Mike and Crow" => "#009900",
"MIKE and GYPSY" => "#009900",
"Crow and Tom" => "#009900",
"TOM\&CROW" => "#009900",
"Crow \& Tom" => "#009900",
"Tom \& Crow" => "#009900",
"Dr. F" => "#777777",
"Dr. F." => "#777777",
"Dr F." => "#777777",
"DR. F" => "#777777",
"DR.F" => "#777777",
"Dr.F" => "#777777",
"Dr.F." => "#777777",
"Dr. FORRESTER" => "#777777",
"DR. FORRESTER" => "#777777",
"Dr F" => "#777777",
"Mrs. F" => "#770077",
"Ma F" => "#770077",
"MRS. F" => "#770077",
"MRS. FORRESTER" => "#770077",
"MOM F" => "#770077",
"Pearl" => "#770077",
"PEARL" => "#770077",
"PEARL and BOBO" => "#770033",
"PearlF" => "#770077",
">" => "#000000",
"<" => "#009999",
'[' => "#009999",
'(' => "#009999",
"" => "#000000",
"Frank"=> "#777700",
"FRANK" => "#777700",
"SEAN" => "#777700",
"PINHEAD" => "#007777",
"FIDEL" => "#007777",
"BEAR" => "#007777",
"Peanut" => "#007777",
"Bobo" => "#777700",
"BOBO" => "#777700",
"GIRL SCOUT" => "#007777",
"VORLON" => "#770000",
"BRIDGE" => "#770000",
"STEPHEN" => "#770000",
"CZAPLINSKI" => "#770000",
"JELLICO" => "#770000",
"Nine" => "#770000",
"SRDS" => "#770000",
"Stephen" => "#770000",
"Magic Voice" => "#770000",
"MAGIC VOICE" => "#770000",
"Demonic Voice" => "#770000",
"OFFSCREEN VOICE" => "#770000",
"Speaker" => "#770000",
"Voice" => "#770000",
"VOICE" => "#770000",
"QUEEN" => "#770000",
"TV Voice" => "#770000",
"Voice 2" => "#770000",
"Voice2" => "#770000",
"Voice 3" => "#770000",
"Voice3" => "#770000",
"Voice 4" => "#770000",
"Voice4" => "#770000",
"Voice 5" => "#770000",
"Voice5" => "#770000",
"TORGO" => "#007700",
"B. G." => "#007700",
"B.G." => "#007700",
"Jesse" => "#007700",
"DRUNK" => "#007700",
"MAN" => "#007700",
"MR. B RATLIFF" => "#007700",
"RATLIFF" => "#007700",
"Figure" => "#007700",
"WOMAN" => "#000077",
"Woman" => "#000077",
"LAWYER 1" => "#000077",
"Pitch" => "#000077",
"Dr. E" => "#000077",
"PAULINE" => "#000077",
"NUVEENA" => "#000077",
"MIB" => "#000000",
"JOHN" => "#007777",
"TBR" => "#007777",
"YORK" => "#007777",
"Dr. T" => "#007777",
"Dr. Thinker" => "#007777",
"STEVE" => "#007777",
"LITTLE GIRL" => "#007777",
"STACEY" => "#007777",
"LOUISE" => "#000077",
"Observer" => "#000077",
"OBSERVER" => "#000077",
"Mime" => "#999999",
"Suzanne" => "#000077",
"Bridget" => "#000077",
"probe" => "#000077",
"CADET TODD" => "#000077",
"DR. C" => "#770077",
"Ortega" => "#770077",
"ORTEGA" => "#770077",
"LITTLE BOY" => "#770077",
"MARRISSA" => "#FF0000",
"Marrissa" => "#FF0000",
"Ryan" => "#FF0055",
"Colin" => "#00FF00",
"Drew" => "#0055FF",
"Dr. B" => "#0055FF",
"Wayne" => "#00FF55",
"CADET LISA" => "#00FF55",
"Pitch" => "#883333",
"Dr. G" => "#00FF55",
"Dr. Gore" => "#00FF55",
"Greg" => "#55FF00",
"Sisko" => "#0000FF",
"POLICE OFFICER" => "#0000FF",
"Conan" => "#770000",
# Avoid warning messages from the headers
"From" => "#000000",
"Date" => "#000000",
"Newsgroups" => "#000000",
"Lines" => "#000000",
"Subject" => "#000000",
"http" => "#000000",
"THANKS" => "#000000",
"email" => "#000000",
"telefax" => "#000000",
"CREDITS" => "#000000",
"Credits" => "#000000",
"Disclaimers" => "#000000",
"Disclaimer" => "#000000",
"MiSTed by" => "#000000",
"Misted by" => "#000000",
"MiSTed" => "#000000",
"MiSTING" => "#000000",
"MiSTING BY" => "#000000",
"MSTed" => "#000000",
"MSTing By" => "#000000",
"Original By" => "#000000",
"Written By" => "#000000",
"WARNING" => "#000000",
"Edited by" => "#000000",
"Star Trek" => "#000000",
"ST" => "#000000",
);
# Setup the Text->HTML converter
# The options lines=>1,spaces=>1 looks nicer, but bloats the page terribly.
my $conv = new HTML::FromText({urls=>1,email=>1});
foreach my $file ( @ARGV )
{
open(FILE, $file) or print "Unable to open $file, skipping\n",next;
my $outfile = $file;
$outfile =~ s/txt/html/;
print "Unable to convert filename of $file, skipping\n",next if ( $outfile eq $file );
open(OUT, ">/home/jandrese/www/marrissa/$outfile") or die "Unable to open $outfile for writing, skipping\n",next;
my $shortname = $file;
$shortname =~ s/\.txt$//;
$shortname =~ s/([A-Z0-9])/ $1/g;
$shortname =~ s/-/ -/g;
print "Processing $shortname\n";
print OUT <
Marrissa MST Archive: $shortname
$shortname
ENDOFHEADER
my $prevstart = "";
for ( my $line = ; $line; $line = )
{
chomp $line;
my $firstchar;
if ( $line =~ /^\s*(\S)/ )
{
$firstchar = $1;
}
else
{
print OUT "
\n";
$prevstart = "";
next;
}
if ($firstchar ne '>' &&
$firstchar ne '<' &&
$firstchar ne '[' &&
$firstchar ne '(' )
{
# Check for a speaker
if ( $line =~ /^([\w \.&]{3,15})([\(\[<][^\)\]>]*[\]\)>])*?:/ )
{
$firstchar = $1;
$firstchar =~ s/\s*[\(\[<].*//;
$firstchar =~ s/\s*$//;
$firstchar =~ s/^\s*//;
}
else
{
$firstchar = $prevstart;
}
}
if ( ! exists $speakers{$firstchar} )
{
print STDERR "Unknown Speaker: '$firstchar'\n";
print STDERR "On line: $line\n";
$firstchar = "";
}
if ( $prevstart eq "" || $firstchar ne $prevstart )
{
print OUT "";
print OUT "";
}
$prevstart = $firstchar;
my $htmlline = $conv->parse($line);
print OUT "$htmlline
\n";
}
print OUT <
Return to the Marrissa archive index
ENDOFFOOTER
close(OUT);
close(FILE);
}