#!/usr/bin/tclsh
## $Id$
## $Author$
puts "Loading Dortmunder ... pint = 1"
package require tdom
package require http
package require mysqltcl
package require csv
package require inifile
proc setvar { var value } {
upvar 1 $var v
set v $value
}
proc setup_values { line } {
set newline [list]
foreach item [ string trim [ ::csv::split $line ","]] {
regsub -all {"} $item "" item
if { [string length $item] == 0 || $item == "NULL" } {
lappend newline ''
} else {
if {[string is digit $item]} {
lappend newline $item
} else {
regsub -all {'} $item "''" item
lappend newline "'$item'"
}
}
}
return [::csv::join $newline ","]
}
proc get_dbh { inifile } {
if {[catch {set inifp [ini::open $inifile ]} errmsg]} {
puts stderr "Error opening inifile $inifile: $errmsg"
exit 1
}
set section "database"; array set db {};
foreach key [ini::keys $inifp $section] {
set db($key) [ini::value $inifp $section $key]
}
return [mysqlconnect -host $db(host) -user $db(user) -password $db(password) -db $db(db)]
return $db
}
proc mysql_result { db sql } {
## puts "DEBUG: $sql"
if {[catch {mysqlexec $db $sql} errmsg]} {
puts "Error inserting into games: $errmsg"
puts $sql
return
}
}
proc get_fields { node fields } {
set line [list]
foreach field $fields {
set value [$node getAttribute $field "NULL"]
if { $value == "" } { set value "NULL" }
if { $field == "dob" } {
set info [split $value "/"]
set value "[lindex $info 2]-[lindex $info 1]-[lindex $info 0]"
}
lappend line $value
}
return $line
}
puts "Dortmunder still loading, pint refilled .. pint = 1"
proc get_players_data { db gameid dir} {
set player_fields [list team id pos first_name last_name jersey_number height weight bats throws dob]
set players_dir [file join $dir "batters"]
if {![catch {glob [file join $players_dir "*.xml"]} files]} {
foreach player $files {
if {[file exists $player]} {
set filename $player
set xml [readfile $filename]
if {![catch {set doc [dom parse $xml]}]} {
set root [$doc documentElement]
set name [$root nodeName]
set line [list]; lappend line $gameid
foreach value [get_fields $root $player_fields] {
lappend line $value
}
mysql_result $db "insert into gameday_players values ([setup_values [::csv::join $line ","]]);"
}
}
}
}
}
proc get_gameday_game_info { db gameid lg boxscore games } {
# we need some information from the boxscore
set xml $boxscore
set doc [dom parse $xml]
set root [$doc documentElement]
set name [$root nodeName]
set boxscore [$root selectNode /boxscore]
set game_id [$boxscore getAttribute game_id ""]
set game_date [$boxscore getAttribute date ""]
set home_team [$boxscore getAttribute home_team_code ""]
set away_team [$boxscore getAttribute away_team_code ""]
set away_id [$boxscore getAttribute away_id ""]
set home_id [$boxscore getAttribute home_id ""]
set game_date [join [lrange [split $game_id "/"] 0 2] "/"]
# set type [string range $gameid 18 20]
puts "Game_id: $gameid, Date: $game_date, $lg, $away_team ($away_id) @ $home_team ($home_id)"
set linescore [$root selectNode /boxscore/linescore]
set away_runs [$linescore getAttribute away_team_runs ""]
set home_runs [$linescore getAttribute home_team_runs ""]
set away_hits [$linescore getAttribute away_team_hits ""]
set home_hits [$linescore getAttribute home_team_hits ""]
set away_errors [$linescore getAttribute away_team_errors ""]
set home_errors [$linescore getAttribute home_team_errors ""]
# now we need the game information
if { $games == "" } {
set games ""
} else {
set xml $games
}
set game_fields [list type local_game_time gameday_sw]
set team_fields [list type code abbrev id name w l league]
set stadium_fields [list id name location]
if {![catch {set doc [dom parse $xml]}]} {
set root [$doc documentElement]
set name [$root nodeName]
set line $gameid
set game_type [$root getAttribute type ""]
set local_game_time [$root getAttribute local_game_time ""]
set gameday_sw [$root getAttribute gameday_sw ""]
set node [$root selectNode home]
foreach node [$root selectNode *] {
set name [$node nodeName]
if { $name eq "team" } {
set type [$node getAttribute type "NULL"]
if { $type ne "NULL" } {
set ${type}_team [$node getAttribute code "NULL"]
set ${type}_id [$node getAttribute id "NULL"]
set ${type}_name [$node getAttribute name "NULL"]
set ${type}_w [$node getAttribute w "NULL"]
set ${type}_l [$node getAttribute l "NULL"]
set ${type}_lg [$node getAttribute league "NULL"]
}
}
}
if { $name eq "stadium" } {
set stadium_id [$node getAttribute id "NULL"]
set stadium_name [$node getAttribute name "NULL"]
set stadium_loc [$node getAttribute location "NULL"]
}
}
set values [list $gameid $game_type $lg $local_game_time $gameday_sw $home_team $home_id $home_name $home_w $home_l $home_lg $away_team $away_id $away_name $away_w $away_l $away_lg $home_runs $home_hits $home_errors $away_runs $away_hits $away_errors $stadium_id $stadium_name $stadium_loc]
mysql_result $db "insert into gameday_game_info values ( [setup_values [::csv::join $values ","]]);"
}
proc get_boxscore_data { db xml gamedayid } {
set doc [dom parse $xml]
set root [$doc documentElement]
set name [$root nodeName]
set boxscore [$root selectNode /boxscore]
set game_id [$boxscore getAttribute game_id "NULL"]
set game_date [$boxscore getAttribute date "NULL"]
set home_team [$boxscore getAttribute home_team_code "NULL"]
set away_team [$boxscore getAttribute away_team_code "NULL"]
set away_id [$boxscore getAttribute away_id "NULL"]
set home_id [$boxscore getAttribute home_id "NULL"]
set game_date [join [lrange [split $game_id "/"] 0 2] "/"]
set type [string range $gamedayid 18 20]
set linescore [$root selectNode /boxscore/linescore]
set away_team_runs [$linescore getAttribute away_team_runs "NULL"]
set home_team_runs [$linescore getAttribute home_team_runs "NULL"]
set away_team_hits [$linescore getAttribute away_team_hits "NULL"]
set home_team_hits [$linescore getAttribute home_team_hits "NULL"]
set away_team_errors [$linescore getAttribute away_team_errors "NULL"]
set home_team_errors [$linescore getAttribute home_team_errors "NULL"]
foreach inning [$linescore selectNode inning_line_score] {
set away_runs [$inning getAttribute away "-1"]
set home_runs [$inning getAttribute home "-1"]
set inning [$inning getAttribute inning]
if {$away_runs eq "" } { set away_runs "-2" }
if {$home_runs eq "" } { set home_runs "-2" }
if {$away_runs eq "x"} { set away_runs "-1" }
if {$home_runs eq "x"} { set home_runs "-1" }
set values [::csv::join [list $gamedayid $inning $home_runs $away_runs] ","]
mysql_result $db "insert into gameday_linescore values ( [setup_values $values]);"
}
set batting [$root selectNode /boxscore/batting]
foreach batting_node $batting {
set type [$batting_node getAttribute team_flag "NULL"]
set batting_ids [$batting_node selectNode batter]
foreach batter $batting_ids {
set player_id [$batter getAttribute id "NULL"]
set pos [$batter getAttribute pos "NULL"]
set bo [$batter getAttribute bo "000" ]
set h [$batter getAttribute h "NULL"]
set po [$batter getAttribute po "NULL"]
set hr [$batter getAttribute hr "NULL"]
set bb [$batter getAttribute bb "NULL"]
set so [$batter getAttribute so "NULL"]
set rbi [$batter getAttribute rbi "NULL"]
set ab [$batter getAttribute ab "NULL"]
set r [$batter getAttribute r "NULL"]
set t [$batter getAttribute t "NULL"]
set d [$batter getAttribute d "NULL"]
set lob [$batter getAttribute lob "NULL"]
set a [$batter getAttribute a "NULL"]
set e [$batter getAttribute e "0"]
set sb [$batter getAttribute sb "0"]
set cs [$batter getAttribute cs "0"]
set pb [$batter getAttribute pb "0"]
switch -- $type {
away { set teamid $away_id }
home { set teamid $home_id }
default { puts "ERROR: away/home not defined in batting" }
}
set values [::csv::join [list $player_id $teamid $gamedayid $pos $h $hr $bb $so $rbi $ab $r $t $d $lob $sb $cs $bo] ","]
mysql_result $db "insert into gameday_batting values ( [setup_values $values]);"
set values [::csv::join [list $player_id $teamid $gamedayid $pos $po $a $e $pb] ","]
mysql_result $db "insert into gameday_fielding values ( [setup_values $values]);"
}
}
set pitching [$root selectNode /boxscore/pitching]
foreach pitching_node $pitching {
set type [$pitching_node getAttribute team_flag]
set pitching_ids [$pitching_node selectNode pitcher]
foreach pitcher $pitching_ids {
set player_id [$pitcher getAttribute id "NULL"]
set pos [$pitcher getAttribute pos "NULL"]
set out [$pitcher getAttribute out "NULL"]
set bf [$pitcher getAttribute bf "NULL"]
set hr [$pitcher getAttribute hr "NULL"]
set bb [$pitcher getAttribute bb "NULL"]
set so [$pitcher getAttribute so "NULL"]
set er [$pitcher getAttribute er "NULL"]
set r [$pitcher getAttribute r "NULL"]
set h [$pitcher getAttribute h "NULL"]
set note [$pitcher getAttribute note "NULL"]
set wins 0; set losses 0; set saves 0
set holds 0; set hopps 0; set sopps 0
if { $note ne "none" } {
set s [split $note ')']
foreach part $s {
if [regexp {^\((.*?),} $part -> a] {
switch -exact -- $a {
L { set losses 1 }
W { set wins 1 }
H { set hopps 1; set holds 1 }
S { set saves 1; set sopps 1 }
BH { set hopps 1 }
BS { set sopps 1 }
default { puts "ERROR, pitcher: value: $a note: $note" }
}
}
}
}
switch -- $type {
away { set teamid $away_id }
home { set teamid $home_id }
default { puts "ERROR: away/home not defined in batting" }
}
mysql_result $db "insert into gameday_pitching values ( [setup_values [::csv::join [list $player_id $teamid $gamedayid $pos $out $bf $hr $bb $so $er $r $h $wins $losses $holds $hopps $saves $sopps] ","]]);"
}
}
}
proc get_player_data { db xml url gamedayid } {
set doc [dom parse $xml]
set root [$doc documentElement]
set name [$root nodeName]
set umpires [$root selectNode umpires]
if { $umpires != "" } {
set ump_fields [list name position]
foreach umpire [$umpires selectNode umpire] {
set line [list $gamedayid]
foreach value [get_fields $umpire $ump_fields] {
lappend line $value
}
mysql_result $db "insert into gameday_umps values ( [setup_values [::csv::join $line ","]] );"
}
}
set game [$root selectNode /game]
foreach team [$game selectNode team] {
set teamid [string tolower [$team getAttribute id]]
foreach player [$team selectNode player] {
set id [$player getAttribute id "NULL"]
set first [$player getAttribute first "NULL"]
set last [$player getAttribute last "NULL"]
set num [$player getAttribute num "999"]
set boxname [$player getAttribute boxname "NULL"]
set b [$player getAttribute rl "X"]
if {$num eq ""} { set num 999 }
if {[string range $num 0 0] == "-"} { set num 999 }
set values [::csv::join [list $gamedayid $teamid $id $first $last $num $boxname $b] ","]
mysql_result $db "insert into gameday_rosters values ( [setup_values $values] );"
}
set coach_fields [list id first last num position]
foreach coach [$team selectNode coach] {
set line [list $gamedayid $teamid]
foreach value [get_fields $coach $coach_fields] {
lappend line $value
}
mysql_result $db "insert into gameday_coaches values ( [setup_values [::csv::join $line ","]] );"
}
}
}
proc get_hitting_data { db xml gamedayid } {
set hip_fields [list x y inning batter pitcher type des team]
set doc [dom parse $xml]
set root [$doc documentElement]
set name [$root nodeName]
set hitchart [$root selectNode /hitchart ]
set this_name [$hitchart nodeName]
set hips [$hitchart selectNode *]
foreach hip $hips {
set this_name [$hip nodeName]
set fields [set ${this_name}_fields]
set line $gamedayid
foreach value [get_fields $hip $fields] {
lappend line $value
}
mysql_result $db "insert into gameday_hit_locations values ( [setup_values [::csv::join $line ","]] );"
}
}
proc getnodes { node level } {
set t [$node nodeName]
# get attrs for the current node
foreach b [$node attributes *] {
puts "$level:[$node nodeName]:$b:[$node getAttribute $b]"
}
# get each node
set n [$node selectNode *]
if {[llength $n]>0} {
set level "$level:$t"
foreach a $n {
getnodes $a $level
}
}
}
proc Q { value } {
if { $value != "NULL" } {
if {[string is ascii $value]} {
set value [QQ $value]
}
}
return $value
}
proc P { line } {
set new_line [list]
foreach item $line {
lappend new_line [Q $item]
}
return $new_line
}
proc QQ { s } {
return "\'[string map { ' '' } $s]\'"
}
proc pbp_data { db dir gameid } {
set atbat_fields [list num b batter des event o pitcher s score stand]
set pitch_fields [list id ax ay az break_angle break_length break_x break_y break_z des end_speed on_1b on_2b on_3b pfx_x pfx_z px pz start_speed sz_bot sz_top type vx0 vy0 vz0 x x0 y y0 z0 pitch_type type_confidence]
set runner_fields [list id earned end event rbi score start]
set action_fields [list pitch b des event o player s score]
set po_fields [list des]
set inning_dir [file join $dir "inning"]
if {![catch {glob [file join $inning_dir "*.xml"]} inning_files]} {
foreach inning $inning_files {
if {[file exists $inning]} {
set filename $inning
set xml [readfile $filename]
if {![catch {set doc [dom parse $xml]}]} {
set root [$doc documentElement]
set name [$root nodeName]
if { $name != "inning" } { continue }
set num [$root getAttribute num]
foreach thishalf [$root childNodes] {
if {[$thishalf nodeName] == "top"} {
set half 0
} else {
set half 1
}
set items [list action atbat]
set nodes [$thishalf selectNode *]
foreach node $nodes {
set this_name [$node nodeName]
set fields [ set ${this_name}_fields]
set line [list $gameid $num $half]
foreach value [get_fields $node $fields] {
lappend line $value
}
mysql_result $db "INSERT INTO gameday_${this_name} VALUES ( [setup_values [::csv::join $line ","]] );"
set atbat_num [$node getAttribute num -1]
set pitch_num 0
foreach c [$node selectNode *] {
set this_name [$c nodeName]
if {$this_name == "pitch"} {
incr pitch_num
}
set line [list $gameid $num $half $atbat_num $pitch_num]
set fields [ set ${this_name}_fields]
foreach value [get_fields $c $fields] {
lappend line $value
}
mysql_result $db "INSERT INTO gameday_$this_name VALUES ( [setup_values [::csv::join $line ","]] );"
}
}
}
}
catch {$doc delete}
}
}
}
}
proc readfile { file } {
if {[catch {open $file "r"} fp]} {
# puts "Error reading $file, $fp"
return -1
}
set data [read $fp]
close $fp
return $data
}
proc go_get_it { lg daydir urlList inifile } {
set db [get_dbh $inifile]
foreach url $urlList {
set gamedayid [file tail $url]
set rows [mysqlsel $db "select * from gameday_game_info where gameid = \'$gamedayid\'" -list]
if { $rows > 0 } {
puts "Already have data for $gamedayid"
continue
}
set file [file join $url boxscore.xml]
set data [readfile $file]
if {$data == "-1" } {
puts "boxscore data not found for [file tail $url]"
} else {
set boxscore $data
set file [file join $url game.xml]
set data [readfile $file]
if { $data == "-1" } {
puts "game.xml data not found for [file tail $url]"
set data ""
}
get_gameday_game_info $db $gamedayid $lg $boxscore $data
get_boxscore_data $db $boxscore $gamedayid
}
set file [file join $url players.xml]
set data [readfile $file]
if {$data == "-1" } {
puts "player data not found for [file tail $url]"
} else {
get_player_data $db $data $url $gamedayid
}
set file [file join $url inning inning_hit.xml]
set data [readfile $file]
if {$data == "-1" } {
puts "inning hit data not found for [file tail $url]"
} else {
get_hitting_data $db $data $gamedayid
}
set file [file join $url inning inning_1.xml]
if {$data == "-1" } {
puts "No file for inning 1 on [file tail $url] ?! Skipping pbp data"
} else {
pbp_data $db $url $gamedayid
}
if {[catch {glob [file join $url batters]} errMsg]} {
puts "No batters directory"
} else {
get_players_data $db $gamedayid $url
}
}
}
proc sleep { N } {
after [expr {int($N*1000)}]
}
proc createdir { dir } {
if {[file exists $dir]} {
if {![file isdirectory $dir] } {
puts "$dir already exists, but is not a directory!"
exit 1
}
} else {
if {[catch {file mkdir $dir} errmsg]} {
puts "Error creating $dir: $errmsg"
exit 1
}
}
}
proc writefile { url dir file } {
createdir $dir
set token [http::geturl $url]
if [http_data_ok_p $token] {
set filename [file join $dir $file]
if {[file exists $filename]} {
# puts "$file exists"
return 0
}
if {[catch {open $filename "w"} fp]} {
puts "Error creating $dir: $fp"
exit 1
}
set data [http::data $token]
puts $fp $data
close $fp
}
catch {http::cleanup $token}
}
proc http_data_ok_p { token } {
switch -glob [http::ncode $token] {
404 { set found_p 0 }
default { set found_p 1 }
}
return $found_p
}
proc pullOutTheURLs {html} {
# Parse your HTML document into a DOM tree structur
set doc [dom parse -html $html]
# root will be the root element of your HTML document,
# ie. the HTML element
set root [$doc documentElement]
# The following finds all anchor links . It isn't clear to me,
# if you also interested in the urls of , and
# elements.
set nodeList ""
catch {set nodeList [$root selectNodes {descendant::a}]}
if {$nodeList == "" } {
return {}
}
# init the result list
set urlList {}
# Pull out the Values of the href attributes
foreach node $nodeList {
set attList [$node attributes *]
foreach attribute $attList {
if {[string tolower $attribute] == "href"} {
set url [$node getAttribute $attribute]
#if {[string range $url 0 3] == "gid_"} {
# lappend urlList [$node getAttribute $attribute]
# break
#}
#puts $url
lappend urlList [$node getAttribute $attribute]
}
}
}
# Get rid of the DOM representation of your HTML document
$doc delete
# finished
return $urlList
}
proc drill_down { base level url } {
set token [http::geturl $url]
if {[http_data_ok_p $token]} {
set urls [pullOutTheURLs [http::data $token]]
foreach u $urls {
if { [string index $u 0] == "/" } { continue }
if { [string index $u end] == "/"} {
set l "$level/[regsub -all {\/} $u ""]"
drill_down $base $l "${url}${u}"
} else {
writefile ${url}${u} [file join $base $level] $u
# puts "[file tail [string toupper $level]]: $u"
}
# sleep 1
}
} else {
puts "Could not get $url"
}
catch {http::cleanup $token}
}
proc get_files { base_url base_dir year month day } {
set dayurl "$base_url/$year/$month/$day/"
set basedir [file join $base_dir $year $month $day]
createdir $basedir
set urlList {}
set token [http::geturl $dayurl]
if {[http_data_ok_p $token]} {
catch {set urlList [pullOutTheURLs [http::data $token]]}
catch {http::cleanup $token}
foreach url $urlList {
set gamedayid [regsub -all {\/} $url ""]
# puts $url
if {[string range $url 0 3] == "gid_"} {
puts "Working on $gamedayid"
# puts "${dayurl}${url}"
drill_down $basedir $gamedayid "${dayurl}${url}"
}
}
} else {
puts "Problem with $dayurl"
}
}
puts "Done loading dortmunder, pint = 0"