#!/usr/bin/perl -w

use strict;
use HTML::FromText;

# Convert the MST files in here into nice HTML files.  Retain the linebreaks
# because that's the most authentic way to read it. :)

# These are the various "speakers" that are to be color coded. Over the course 
# of time and many fanfics this list has grown quite large.  Annoyingly, there
# isn't a lot of consistency (even with fanfics by the same author), so this
# list has a lot of alternate spellings and permutations in it.  Good thing
# Perl hashes are fast
my %speakers = 
(
	"Mike" => "#000099",
	"MIKE" => "#000099",
	"MIKE B." => "#000099",
	"Joel" => "#000099",
	"JOEL" => "#000099",
	"Crow" => "#999900",
	"CROW" => "#999900",
	"Tom"  => "#990000",
	"TOM"  => "#990000",
	"TOM09"  => "#990000",
	"TOM02"  => "#990000",
	"TOM SERVO"  => "#990000",
	"SERVO"  => "#990000",
	"Gypsy"=> "#990099",
	"GYPSY"=> "#990099",
	"CAMBOT" => "#555500",
	"All"  => "#009900",
	"ALL"  => "#009900",
	"BOTS"  => "#009900",
	"MIKE \& BOTS"  => "#009900",
	"Bots"  => "#009900",
	"BOTH"  => "#009900",
	"Both"  => "#009900",
	"The bots"  => "#009900",
	"CROWD"  => "#009900",
	"Crowd"  => "#009900",
	"OTHERS"  => "#009900",
	"others"  => "#009900",
	"Others"  => "#009900",
	"Tom and Crow"  => "#009900",
	"TOM and CROW"  => "#009900",
	"Mike and Tom"  => "#009900",
	"Mike \& Crow"  => "#009900",
	"MIKE \& TOM"  => "#009900",
	"MIKE \& CROW"  => "#009900",
	"MIKE \& SERVO"  => "#009900",
	"CROW \& MIKE"  => "#009900",
	"CROW \& SERVO"  => "#009900",
	"CROW \& TOM"  => "#009900",
	"CROW and TOM"  => "#009900",
	"Tom \& Mike"  => "#009900",
	"Mike \& Tom"  => "#009900",
	"TOM \& MIKE"  => "#009900",
	"Crow \& Mike"  => "#009900",
	"Mike and Crow"  => "#009900",
	"MIKE and GYPSY"  => "#009900",
	"Crow and Tom"  => "#009900",
	"TOM\&CROW" => "#009900",
	"Crow \& Tom" => "#009900",
	"Tom \& Crow" => "#009900",
	"Dr. F" => "#777777",
	"Dr. F." => "#777777",
	"Dr F." => "#777777",
	"DR. F" => "#777777",
	"DR.F" => "#777777",
	"Dr.F" => "#777777",
	"Dr.F." => "#777777",
	"Dr. FORRESTER" => "#777777",
	"DR. FORRESTER" => "#777777",
	"Dr F"  => "#777777",
	"Mrs. F" => "#770077",
	"Ma F" => "#770077",
	"MRS. F" => "#770077",
	"MRS. FORRESTER" => "#770077",
	"MOM F" => "#770077",
	"Pearl" => "#770077",
	"PEARL" => "#770077",
	"PEARL and BOBO" => "#770033",
	"PearlF" => "#770077",
	">"    => "#000000",
	"<"    => "#009999",
	'['    => "#009999",
	'('    => "#009999",
	""     => "#000000",
	"Frank"=> "#777700",
	"FRANK" => "#777700",
	"SEAN" => "#777700",
	"PINHEAD" => "#007777",
	"FIDEL" => "#007777",
	"BEAR" => "#007777",
	"Peanut" => "#007777",
	"Bobo" => "#777700",
	"BOBO" => "#777700",
	"GIRL SCOUT" => "#007777",
	"VORLON" => "#770000",
	"BRIDGE" => "#770000",
	"STEPHEN" => "#770000",
	"CZAPLINSKI" => "#770000",
	"JELLICO" => "#770000",
	"Nine" => "#770000",
	"SRDS" => "#770000",
	"Stephen" => "#770000",
	"Magic Voice" => "#770000",
	"MAGIC VOICE" => "#770000",
	"Demonic Voice" => "#770000",
	"OFFSCREEN VOICE" => "#770000",
	"Speaker" => "#770000",
	"Voice" => "#770000",
	"VOICE" => "#770000",
	"QUEEN" => "#770000",
	"TV Voice" => "#770000",
	"Voice 2" => "#770000",
	"Voice2" => "#770000",
	"Voice 3" => "#770000",
	"Voice3" => "#770000",
	"Voice 4" => "#770000",
	"Voice4" => "#770000",
	"Voice 5" => "#770000",
	"Voice5" => "#770000",
	"TORGO" => "#007700",
	"B. G." => "#007700",
	"B.G." => "#007700",
	"Jesse" => "#007700",
	"DRUNK" => "#007700",
	"MAN" => "#007700",
	"MR. B RATLIFF" => "#007700",
	"RATLIFF" => "#007700",
	"Figure" => "#007700",
	"WOMAN" => "#000077",
	"Woman" => "#000077",
	"LAWYER 1" => "#000077",
	"Pitch" => "#000077",
	"Dr. E" => "#000077",
	"PAULINE" => "#000077",
	"NUVEENA" => "#000077",
	"MIB"  => "#000000",
	"JOHN" => "#007777",
	"TBR" => "#007777",
	"YORK" => "#007777",
	"Dr. T" => "#007777",
	"Dr. Thinker" => "#007777",
	"STEVE" => "#007777",
	"LITTLE GIRL" => "#007777",
	"STACEY" => "#007777",
	"LOUISE" => "#000077",
	"Observer" => "#000077",
	"OBSERVER" => "#000077",
	"Mime" => "#999999",
	"Suzanne" => "#000077",
	"Bridget" => "#000077",
	"probe" => "#000077",
	"CADET TODD" => "#000077",
	"DR. C" => "#770077",
	"Ortega" => "#770077",
	"ORTEGA" => "#770077",
	"LITTLE BOY" => "#770077",
	"MARRISSA" => "#FF0000",
	"Marrissa" => "#FF0000",
	"Ryan" => "#FF0055",
	"Colin" => "#00FF00",
	"Drew" => "#0055FF",
	"Dr. B" => "#0055FF",
	"Wayne" => "#00FF55",
	"CADET LISA" => "#00FF55",
	"Pitch" => "#883333",
	"Dr. G" => "#00FF55",
	"Dr. Gore" => "#00FF55",
	"Greg" => "#55FF00",
	"Sisko" => "#0000FF",
	"POLICE OFFICER" => "#0000FF",
	"Conan" => "#770000",
# Avoid warning messages from the headers
	"From" => "#000000",
	"Date" => "#000000",
	"Newsgroups" => "#000000",
	"Lines" => "#000000",
	"Subject" => "#000000",
	"http" => "#000000",
	"THANKS" => "#000000",
	"email" => "#000000",
	"telefax" => "#000000",
	"CREDITS" => "#000000",
	"Credits" => "#000000",
	"Disclaimers" => "#000000",
	"Disclaimer" => "#000000",
	"MiSTed by" => "#000000",
	"Misted by" => "#000000",
	"MiSTed" => "#000000",
	"MiSTING" => "#000000",
	"MiSTING BY" => "#000000",
	"MSTed" => "#000000",
	"MSTing By" => "#000000",
	"Original By" => "#000000",
	"Written By" => "#000000",
	"WARNING" => "#000000",
	"Edited by" => "#000000",
	"Star Trek" => "#000000",
	"ST" => "#000000",
);

# Setup the Text->HTML converter
# The options lines=>1,spaces=>1 looks nicer, but bloats the page terribly.
my $conv = new HTML::FromText({urls=>1,email=>1});	

foreach my $file ( @ARGV ) 
{
	open(FILE, $file) or print "Unable to open $file, skipping\n",next;
	my $outfile = $file;
	$outfile =~ s/txt/html/;
	print "Unable to convert filename of $file, skipping\n",next if ( $outfile eq $file ); 
	open(OUT, ">/home/jandrese/www/marrissa/$outfile") or die "Unable to open $outfile for writing, skipping\n",next;
	
	my $shortname = $file;
	$shortname =~ s/\.txt$//;
	$shortname =~ s/([A-Z0-9])/ $1/g;
	$shortname =~ s/-/ -/g;

	print "Processing $shortname\n";

	print OUT <<ENDOFHEADER;
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<HTML>
<HEAD>
<TITLE>
Marrissa MST Archive: $shortname
</TITLE>
</HEAD>

<BODY BGCOLOR="#FFFFFF">
<!-- Automatically generated with mst2html -->
<FONT COLOR="000000">
<H1 ALIGN="CENTER">$shortname</H1>
ENDOFHEADER

	my $prevstart = "";
	for ( my $line = <FILE>; $line; $line = <FILE> )
	{
		chomp $line;
		my $firstchar;
		if ( $line =~ /^\s*(\S)/ )
		{
			$firstchar = $1;
		}
		else
		{
			print OUT "<BR>\n";
			$prevstart = "";
			next;
		}


		if ($firstchar ne '>' && 
			$firstchar ne '<' && 
			$firstchar ne '[' &&
			$firstchar ne '(' )
		{
			# Check for a speaker
			if ( $line =~ /^([\w \.&]{3,15})([\(\[<][^\)\]>]*[\]\)>])*?:/ )
			{
				$firstchar = $1;
				$firstchar =~ s/\s*[\(\[<].*//;
				$firstchar =~ s/\s*$//;
				$firstchar =~ s/^\s*//;
			}
			else
			{
				$firstchar = $prevstart;
			}
		}

		if ( ! exists $speakers{$firstchar} )
		{
			print STDERR "Unknown Speaker: '$firstchar'\n";
			print STDERR "On line: $line\n";
			$firstchar = "";
		}
		
		if ( $prevstart eq "" || $firstchar ne $prevstart )
		{
			print OUT "</FONT>";
			print OUT "<FONT COLOR=\"$speakers{$firstchar}\">";
		}
		$prevstart = $firstchar;
		my $htmlline = $conv->parse($line);
		print OUT "$htmlline<BR>\n";
	}

	print OUT <<ENDOFFOOTER;
</FONT>
<HR>
<A HREF="index.html">Return</A> to the Marrissa archive index
</BODY>
</HTML>
ENDOFFOOTER

	close(OUT);
	close(FILE);
}
