#!/usr/bin/perl
use Text::CSV;

open CAP, "<", "capital" or die $!;
while (<CAP>) {
  chomp;
  my ($state, $ab, $cap) = split /\s+/, $_, 3;
  $ab{$state} = $ab;
  $name{$ab} = $state;
  $cap{$ab} = $cap;
}
close CAP;
my %state_cap_pop;
my %state_lg_pop;
my %state_lg_city;

my $csv = Text::CSV->new;
open C, "<", "CENSUS" or die $!;
$/ = "\r\n";
while (<C>) {
  chomp;
  tr/\n/ /;
  die "line $." unless $csv->parse($_);
  my @f = $csv->fields;
  my ($loc, $type, $pop) = @f[2,3,4];
#  next if $type eq "Metropolitan Division";
  $pop =~ tr/0-9//cd;
  next unless $pop > 0;
  my ($city, $statelist) = split /,\s+/, $loc;
  for my $state (split /-/, $statelist) {
    next unless $name{$state};
    if ($pop > $state_lg_pop{$state}) {
      $state_lg_pop{$state} = $pop;
      $state_lg_city{$state} = $city;
    }
    if ($type =~ /M(et|ic)ropolitan Statistical Area/ && $city =~ /$cap{$state}/) {
      if (exists       $state_cap_pop{$state}) {
        die "Found two populations for $cap{$state} ($state)\n";
      }
      $state_cap_pop{$state} = $pop;
    }
#    last;  #Only use first state in list
  }
}

for my $ab (sort keys %name) {
  if ($state_cap_pop{$ab} == 0) {
    warn "Couldn't find $cap{$ab} of $ab\n";
    $STOP = 1;
    next;
  }
  $quot{$ab} = $state_lg_pop{$ab} / $state_cap_pop{$ab};
}
exit if $STOP;

print "<table align='center' cellpadding=3 cellspacing=0>\n";
print qq{<tr bgcolor='white'><th>State <th colspan=2>   Capital and <br>its  Population
<th colspan=2>Largest metropolitan area <br>and its population <th
colspan=2 bgcolor="lightblue"> Quotient
};
my @colors = qw(#cccccc pink);
my $color = 0;
for my $ab (sort {$quot{$b} <=> $quot{$a} || $a cmp $b} keys %name) {
#  printf "<tr bgcolor='$colors[$color]'><td>%2s <td>%12s <td align=right>%8s <td>%12s <td align=right>%8s <td align=right>%6d<td>%s\n",
#    $ab, $cap{$ab}, comma_int($state_cap_pop{$ab}), 
#      $state_lg_city{$ab}, comma_int($state_lg_pop{$ab}),
#        int($quot{$ab}), frac($quot{$ab});
  print "$ab $state_lg_city{$ab}\n";
  $color += 1;
  $color %= @colors;
}
print "</table>\n\n";

open G, ">", "gnuplot.dat" or die $!;
for my $ab (sort {$quot{$b} <=> $quot{$a}} keys %name) {
  print G log10($state_cap_pop{$ab}), " ", log10($quot{$ab}), " # $ab\n";
}
close G;

sub comma_int {
  my $n = int(shift());
  my $i = -3;
  while (length($n) + $i > 0) {
    substr($n, $i, 0, ",");
    $i -= 4;
  }
  return $n;
}

sub frac {
  my $n = shift;
  $n -= int($n);
  $n = sprintf("%.2f", $n);
  $n =~ s/^.*\././;
  return $n;
}

sub log10 {
  return log($_[0])/log(10);
}
