#!/bin/perl # zip_latlong.pl # Perl implementation 4 Nov 95 by Dan Kegel (dank@alumni.caltech.edu) # Available as ftp://ftp.alumni.caltech.edu/pub/dank/zip_latlong.pl # # Input: unzipped state file, e.g. AK.ZSC. # Output: zip code weighted latitude & longitude. # Example: gunzip < ak.zip | perl zip_latlong.pl > ak.out # # From http://cedr.lbl.gov/pdocs/feas/pop/zip_latlong.html: #--- # Obtain a sorted list sortzip[j] of unique zip codes that are present # in the input file ak.tmp. # Define vectors nrecs[j], sumpop[j], sumlat[j] and sumlong[j]. # In each case j=1,2,...n where n is the number of unique zip codes. # Initialize nrecs, sumpop, sumlat, and sumlong to zero. # For each line i in ak.tmp, do the following: # 1. read a line containing zip[i], pop[i], lat[i], and long[i]. # 2. rlat=lat[i]-lat[1]; rlong[i]-long[1]. # To avoid rounding errors, latitude and longitude values relative # to those of the first record are used. # 3. find the value of j such that zip[i]==sortzip[j]. # 4. nrecs[j]=nrecs[j]+1 # 5. sumpop[j]=sumpop[j]+pop[i] # 6. sumlat[j]=sumlat[j]+pop[i]*rlat[i] # 7. sumlong[j]=sumlong[j]+pop[i]*rlong[i] # For each value of j from 1 to n, do the following: # 1. if sumpop[j].gt.0 {sumlat[j]=sumlat[j]/sumpop[j] + lat[1]} # 2. if sumpop[j].gt.0 {sumlong[j]=sumlong[j]/sumpop[j] + long[1]} # 3. to an output file ak.out, write a line containing # sortzip[j], nrecs[j], sumpop[j], sumlat[j], and sumlat[j]. # The output file ak.out contains five columns: # zip code, number of blocks, population, latitude # (population-weighted), and longitude(population-weighted). #--- # Data looks like this: #"POLID","ZIP","MSACMSA","PMSA","URBAREA","COUSUBCE","PLACE","REG","DIV","CD","CNTYSC","COUSUBCC","COUSUBSC","MSASC","PLACECC","PLACEDC","PLACESC","UASC","CNTY","CCCE","PLACEFP","COUSUBFP","FUNCSTAT","URBANRUR","XCI","SAC9","ANRC","AIANACE","AIANAFP","AIANACC","ARTLI","POP","HUS","AREALAND","XLONG","YLAT" #"110010001.00101","20007","8840","9999","8840","100","0005","3","5","98","20","C5","20","22","C5","3","20","22","001","9","50000","50000","S","1","9","3","99","9999","99999","99","9",291,218,0.568,-77.063613,38.915733 #"110010001.00102","20007","8840","9999","8840","100","0005","3","5","98","20","C5","20","22","C5","3","20","22","001","9","50000","50000","S","1","9","3","99","9999","99999","99","9",35,12,0.028,-77.065762,38.91405 # Skip field names $legend = <>; while (<>) { chop; # Extract fields 2(ZIP), 32(POP), 35(XLONG), and 36(YLAT). s/"//g; @fields = split(/,/); ($zip, $pop, $xlong, $ylat) = ($fields[1], $fields[31], $fields[34], $fields[35]); if ($xlong0 eq "") { $xlong0 = $xlong; $ylat0 = $ylat; print STDERR "First record: zip $zip, long $xlong, lat $ylat\n"; } $xlong -= $xlong0; $ylat -= $ylat0; $n{$zip}++; $pop{$zip} += $pop; $xlong{$zip} += $pop * $xlong; $ylat{$zip} += $pop * $ylat; } foreach(sort(keys(%n))) { $pop = $pop{$_}; if ($pop == 0) { warn "bug: zero pop for zip $_\n"; next; } $xlong = $xlong{$_} / $pop + $xlong0; $ylat = $ylat{$_} / $pop + $ylat0; print "$_ $n{$_} $pop $xlong $ylat\n"; }