#!/usr/bin/perl
use Text::CSV;

open CAP, "<", "capital" or die $!;
while (<CAP>) {
  chomp;
  my ($state, $ab, $cap) = split /\s+/, $_, 3;
  $ab{$state} = $ab;
  $name{$ab} = $state;
  $cap{$ab} = $cap;
}
close CAP;

my %cities;
$STOP=1;

my $csv = Text::CSV->new;
open C, "<", "CENSUS" or die $!;
$/ = "\r\n";
while (<C>) {
  chomp;
  tr/\n/ /;
  die "line $." unless $csv->parse($_);
  my @f = $csv->fields;
  my ($loc, $type, $pop) = @f[2,3,4];
  next unless $type eq "Metropolitan Division";
  $pop =~ tr/0-9//cd;
  next unless $pop > 0;
  my ($city, $statelist) = split /,\s+/, $loc;
  my ($state, @other_states) = split /-/, $statelist;
  unless ($name{$state}) {
    warn "Unknown state abbreviation '$state'" unless $seen{$state}++;
    next;
  }
  push @{$cities{$state}}, [$city, $pop];
}

for my $ab (keys %cities) {
  my ($city_1, $city_2) = sort {$b->[1] <=> $a->[1]} @{$cities{$ab}};
  my ($c1, $p1) = @$city_1;
  my ($c2, $p2) = @$city_2;
  $quot{$ab} = $p1/$p2 if $p2 > 0;
  push @scatter, [$ab, log($p1)/log(10), log($p2)/log(10)] if $p2 > 0;
}


print "<table align='center' cellpadding=3 cellspacing=0>\n";
print qq{<tr bgcolor='white'><th>State <th colspan=2>  Largest city and <br>its  Population
<th colspan=2>Second-largest city <br>and its population <th
colspan=2 bgcolor="lightblue"> Quotient
};
my @colors = qw(#cccccc pink);
my $color = 0;
for my $ab (sort {$quot{$b} <=> $quot{$a}} keys %cities) {
  my ($city_1, $city_2) = sort {$b->[1] <=> $a->[1]} @{$cities{$ab}};
  my ($c1, $p1) = @$city_1;
  my ($c2, $p2) = @$city_2;
#  print "$ab $city_1->[0] $city_2->[0]\n";
  if ($p2 > 0) {
    printf "<tr bgcolor='$colors[$color]'><td>%12s <td>%24s <td align=right>%8s <td>%24s <td align=right>%8s <td align=right>%6d<td>%s\n",
      $name{$ab}, 
      $c1, comma_int($p1), 
      $c2, comma_int($p2), 
      int($p1/$p2), frac($p1/$p2);
  } else {
    printf "<tr bgcolor='$colors[$color]'><td>%12s <td>%24s <td align=right>%8s <td colspan=4>&mdash;\n",
      $name{$ab}, 
      $c1, comma_int($p1);
  }
  $color += 1;
  $color %= @colors;
}
print "</table>\n\n";

open G, ">", "gnuplot.dat" or die $!;
for my $pair (@scatter) {
  my ($ab, $c1, $c2) = @$pair;
  print G "$c1 $c2 # $ab\n";
}
close G;
open G, ">", "gnuplot.gpl" or die $!;
print G qq{
set output "scatterplot.pbm"
set terminal pbm color
set nokey
};
for my $pair (@scatter) {
  my ($ab, $c1, $c2) = @$pair;
  print G qq{set label " $ab" at $c1, $c2\n};
}
print G qq{plot "gnuplot.dat" with points\n};
close G;

system("gnuplot < gnuplot.gpl && cjpeg -q 95 scatterplot.pbm > scatterplot.jpg");



sub comma_int {
  my $n = int(shift());
  my $i = -3;
  while (length($n) + $i > 0) {
    substr($n, $i, 0, ",");
    $i -= 4;
  }
  return $n;
}

sub frac {
  my $n = shift;
  $n -= int($n);
  $n = sprintf("%.2f", $n);
  $n =~ s/^.*\././;
  return $n;
}

sub log10 {
  return log($_[0])/log(10);
}
