#!/afs/isis/pkg/isis/bin/perl # Last modified: Time-stamp: <2004-09-13 16:15:58 haines> # # Abstract: Get NOS data for each file in nos_locs.txt # from http call dependent on station ID. Screen scrape data from html. # # Usage: % get_all_datum.pl [-d] # # Author: Sara Haines (2003-09) # Processing: # (0) read in station list # a.) report station_id to local file # For each station # (1) Capture station tidal datum # a.) parse epoch # b.) parse tide datum, reference all datum to MLLW # (2) Capture bench mark sheet # a.) parse time period # b.) parse NAVD offset (check units and reference, MLLW--don't do currently) # (3) Capture station info data. This is better locaion scrape to get # the reference bench mark id used to calculate NAVD88 height # above tidal reference # a. parse tidal and geodetic string (has link with bench mark ID) # (4) report information found to local file # a.) report station_id # b.) report reference to tide datums # c.) report reference to NAVD88 if found and benchmark ID for this vertical datum # d.) report time period of averaging (if found) # e.) report tidal epoch $delimiter = "\n"; # if debugging requested print messages to STDOUT if (grep /[debug|DEBUG|d]/, @ARGV) { $debug = 1; # remove it from the list of input args @ARGV = grep !/([debug|DEBUG|d])/, @ARGV; } # add libraries needed for this function use POSIX qw(strftime); use LWP::Simple; # $now = strftime("%Y:%m:%d %H:%M:%S",gmtime); if ($debug) { print "\n==== Starting: $now UTC ==== Perl Version: $]\n"; } $starttime = time; ############################################################################ # (0) read in station list # $doc = get("file:/opt/local/seacoos/bin/nos_locs.txt"); $doc = get("file:/afs/isis.unc.edu/depts/marine/workspace/haines/seacoos/nos/get_noaa_nos/nos_locs.txt"); # splitting a new line drops new lines. @station_data = split(/$delimiter/, $doc); # if (debug) { print $doc;} # extract station_ids # there's probably a better, cleaner way to do this in perl, but ... foreach $_ (@station_data) { push @station_list, substr($_, 0, 7); } $starttime2 = time; for ($i=0; $i<=$#station_list; $i++) { @all_data = (); # clear all_data $station_id = $station_list[$i]; if ($debug) { print "$station_id\n";} $temp_data_dir = "./datum/"; $temp_data_file = "$temp_data_dir/$station_id.txt"; #print "Saving data to $temp_data_file ...\n\n\n"; open(OUTFILE,">$temp_data_file")|| die('Unable to write temporary data file...'); ######################################################################### # (1) Capture station tidal datum (request is for data in units of meters) $temp_str1 = "http://co-ops.nos.noaa.gov/cgi-bin/co-ops_qry_direct.cgi?stn=$station_id"; $temp_str2 = "&dcp=1&ssid=WL&pc=W7&datum=NULL&unit=0&bdate=20040501&edate=20040501"; $temp_str3 = "&date=1&shift=0&level=-4&form=0&host=&addr=152.2.92.58&data_type=acc&format=View+Data"; $nos_url = $temp_str1 . $temp_str2 . $temp_str3; # Get html document $doc = get($nos_url); # print $doc; # Screen scrape http for data lines # m{}s = match even if it includes newlines (test string as single line) # m{}g = do match more than once until end of stream (match globally) # m{}i = case insenstive match # parse tidal epoch @all_data = $doc =~ m{Epoch \((.*)\)<}si; # if ($debug) { print $epoch ."\n"; } # further limit to last line of data found in search for station tide datum $epoch=$all_data[$#all_data]; # if ($epoch) { print $epoch ."\n"; } # match everything between station-id and < to scrape data @all_data = $doc =~ m{($station_id.*?)<}sgi; # further limit to last line of data found in search for station tide datum $last_line=$all_data[$#all_data]; # break up the line @all_datum = split(" ", $last_line); # if (debug) { print $#all_datum ."\n"; } if ($#all_datum >= 7) { $have_tide_datum = 1; # parse out each datum value $MHHW = $all_datum[1]; $MHW = $all_datum[2]; $DTL = $all_datum[3]; $MTL = $all_datum[4]; $MSL = $all_datum[5]; $MLW = $all_datum[6]; $MLLW = $all_datum[7]; # offset all datum relative to MLLW $MHHW = $MHHW - $MLLW; $MHW = $MHW - $MLLW; $DTL = $DTL - $MLLW; $MTL = $MTL - $MLLW; $MSL = $MSL - $MLLW; $MLW = $MLW - $MLLW; $MLLW = $MLLW - $MLLW; # do last since it is the offset!! } else { $have_tide_datum = 0; } ######################################################################### # (2) Capture bench mark data # $nos_url = "http://tidesonline.nos.noaa.gov/data_read.shtml?station_info=$station_id"; $nos_url = "http://co-ops.nos.noaa.gov/benchmarks/$station_id.html"; # Get html document $doc = get($nos_url); # print $doc; $delimiter = "\n"; @station_data = split(/$delimiter/, $doc); # look for lines with the following things @epoch_bench_data = ("TIME PERIOD:", "TIDAL EPOCH:", "NORTH AMERICAN VERTICAL DATUM-1988"); $pattern = '(' . join('|', @epoch_bench_data) . ')'; ($period, $bench_epoch, $navd) = grep /$pattern/, @station_data; if (debug) { print $period ."\n"; print $bench_epoch ."\n"; print $navd ."\n"; } @all_data = $period =~ m{TIME PERIOD:\s*([\d| |\w|\-|;]*)}sgi; $period = $all_data[$#all_data]; @all_data = $bench_epoch =~ m{TIDAL EPOCH:\s*([\d| |\w|\-|;]*?)$}sgi; $bench_epoch = $all_data[$#all_data]; # NAVD88 in benchmark sheets is computed relative to MLLW in meters # for a specific benchmark. These sheets do not designate which benchmark. # Find the geodetic benchmark id in the station_info.cgi in next step. @all_data = $navd =~ m{\(NAVD\)\s*=([-|\d|\.| ]*)}sgi; $navd = $all_data[$#all_data]; ######################################################################### # (3) Capture tidal bench mark data # from station info data $nos_url = "http://co-ops.nos.noaa.gov/cgi-bin/station_info.cgi?stn=$station_id"; # Get html document $doc = get($nos_url); # print $doc; # look for href with PID= @all_data = $doc =~ m{PID=(.*?)\">}sgi; $bench_mark = $all_data[$#all_data]; #print @all_data . "\n"; ######################################################################### # (4) report metadata # if ($have_tide_datum) { # print OUTFILE "station_id = ". $station_id . "\n"; print OUTFILE "reference_to_MLLW = " . $MLLW . "\n"; print OUTFILE "reference_to_MLW = " . $MLW . "\n"; print OUTFILE "reference_to_MSL = " . $MSL . "\n"; print OUTFILE "reference_to_MTL = " . $MTL . "\n"; print OUTFILE "reference_to_DTL = " . $DTL . "\n"; print OUTFILE "reference_to_MHW = " . $MHW . "\n"; print OUTFILE "reference_to_MHHW = " . $MHHW . "\n"; } else { # print OUTFILE "station_id = ". $station_id . "\n"; } if ($navd) { print OUTFILE "reference_to_NAVD88 = " . $navd. "\n"; if ($bench_mark) { # single quotes are for MATLAB print OUTFILE "reference_to_NAVD88_benchmark_id = \'$bench_mark\'\n"; } else { # single quotes are for MATLAB print OUTFILE "reference_to_NAVD88_benchmark_id = \'UNKNOWN\'\n"; } } if ($have_tide_datum) { if ($period) { # if have tide datum period string # single quotes are for MATLAB print OUTFILE "reference_tide_datum_time_period = \'$period\'\n"; } if ($epoch) { # single quotes are for MATLAB print OUTFILE "reference_tide_datum_epoch = \'$epoch\'\n"; } if (!($epoch) && ($bench_epoch)) { # if don't have epoch already and have epoch in the bench mark sheet # single quotes are for MATLAB print OUTFILE "reference_tide_datum_epoch = \'$bench_epoch\'\n"; } } $navd = ''; $bench_mark = ''; $period = ''; $bench_epoch = ''; $epoch = ''; } # for ($i=0; $i<=$#station_list; $i++) $subtime = time - $starttime2; if ($debug) { print (" ... archive time = $subtime (seconds)\n"); } $cummtime = time - $starttime; if ($debug) { print ("Total script time = $cummtime (seconds)\n"); }