#!/usr/bin/perl #parses a paste of BC transit's schedule listing into links for http://skeena.net/bus/ #usage: # ./parse_transit.pl schedule_paste.txt > links_snippet.html # #schedule_paste.txt should have lines that look like: #1 RICHARDSON / DOWNTOWN #2/2A OAK BAY / WILLOWS / DOWNTOWN #3 GONZALES / BEACON HILL / DOWNTOWN #... #(it's OK if the web browser put blank lines in between, or if BC transit #included a link twice; they'll be sorted out.) $schedule_paste = shift @ARGV; $schedule_paste =~ s/'/\\'/; $lines = `sort -un \$'$schedule_paste'`; @lines = split /\n/ , $lines; #pull empty line off the top if present if ($lines[0] =~ m/^$/) { shift @lines; } #can't use foreach because of special route numbering; see below while ($#lines) { $line = shift @lines; #route number is the first field, separated by a space #split into 2 fields; the route number and the route names @route = split / / , $line , 2; $route_num = $route[0]; $route_names = $route[1]; #special handling: if it's got a '/' in the route number, #push copies back onto the stack of routes for each #variant of the route number. So far BC transit only does #this for the 2/2A route: if ($route_num =~ m/\//) { @special_nums = split /\// , $route_num; foreach my $special_num (@special_nums) { unshift @lines , $special_num." ".$route_names; } next; } #split route names and provide links for each name @route_names = split /\s+\/\s+/ , $route[1]; $direction = 0; foreach my $name (@route_names) { #more special handling: if the route number contains non-numeric chars, #BC transit's link to the schedule page does not include them, so strip them #out (e.g. the 2A's URL looks like "...route=2:0...") $link_route_num = $route_num; $link_route_num =~ s/[^0-9]//g; print "$route_num $name\n"; #all route names after the first one are the other direction: $direction = 1; } }