A PITCHf/x primer Posted by Mike Fast under Uncategorized [6]. MLBAM used the PITCHf/x data in their Enhanced Gameday application and also made the data freely available for downloading and research. If you want to manipulate and analyze a single game’s worth of data, you can download and import the XML files into a Microsoft Excel. My Profile Settings Download the App Sign Out. Xx MB of 10 GB used Scott Lindholm - Profile Favorite Download Workbook. 2018 PITCHf/x Data 349 views| Scott Lindholm. Baseball Savant PITCHf/x Data Pct of called balls in strike zone (BiZ%) plotted against called strikes outside strike zone (SoZ%) Reference lines are medians.

Download pitchfx XML data (haven't tested in years).
DownloadPitchFX.R
Pitchf X System

| # DownloadPitchFX.R |
| # downloads the massive MLB Gameday data. |
| # Version 0.4 |
| # Version History |
| # 0.5 ~ grab player data, both pitchers and batters, ability to pick team |
| # 0.4 ~ get team data, and ability to grab team info, checks to see if regular season |
| # 0.3 ~ updated so 2010 works, fixed some bugs, and saves as tab delimited file |
| # 0.2 ~ inputs are start and end dates |
| # 0.1 ~ grab Pitch f/x data from MLB Gameday, specify date ranges (takes half a minute for a day's worth of data on my 2.5Ghz machine) |
| # Future Versions: |
| # ~ ability to pick pitchers, batters, teams |
| # - ability to grab matchups |
| # - better searching instead of tediously parsing through each XML file |
| # ~ connect to mysql database |
| # ~ don't overheat computer! |
| # ~ document Gameday Code |
| # downloading pitch f/x data from MLB website |
| # Get data from http://gd2.mlb.com/components/game/mlb/ |
| # XML package http://www.omegahat.org/RSXML/shortIntro.html |
| # Perl script of same application by Mike Fast: |
| # http://fastballs.files.wordpress.com/2007/09/hack_28_parser_mikefast_test_pl.txt |
| # Less general R code from Erik Iverson of Blogistic Reflections: |
| # http://blogisticreflections.wordpress.com/2009/10/04/using-r-to-analyze-baseball-games-in-real-time/ |
| # listing of pitch f/x tools by Baseball Analysts |
| # http://baseballanalysts.com/archives/2010/03/how_can_i_get_m.php |
| # downloadable pitch f/x database from Darrell Zimmerman |
| # http://www.wantlinux.net/category/baseball-data/ |
| # I think gameday data starts 2005 |
| # I think enhanced gameday (pitch fx) has all of 2009, most of 2008, some 2007, tiny bit 2006 |
| # required libraries: |
| library(XML) |
| # code for <game type> in game.xml (input game.type in code) |
| # 'S' ~ spring training, 'R' ~ regular season, 'D' ~ Division Series |
| # 'L' ~ League Championship Series 'W' ~ World Series |
| # code for <game gameday_sw> in game.xml |
| # http://sports.dir.groups.yahoo.com/group/RetroSQL/message/320 |
| # 'N' ~ missing, no pitch info |
| # 'Y' ~ standard w/ pitch locations |
| # 'E' ~ w/ pitch f/x |
| # 'P' ~ for 2010, whatever that's supposed to mean |
| # code for teams |
| # code for players |
| # code for gameday |
| # code for pitch type |
| # code for atbat type |
| # checks for: |
| # gameday type |
| # home, away |
| # player, batter, pitch type |
| # ----------------------------------------------------------- |
| DownloadPitchFX<-function(fileloc='./pitchfx.txt', |
| start.date='2009-05-02', end.date=start.date, |
| URL.base='http://gd2.mlb.com/components/game/mlb/', |
| game.type='R', |
| grab.pitch= c('des', 'type', 'x', 'y', |
| 'start_speed', 'end_speed', |
| 'sz_top', 'sz_bot', 'pfx_x', 'pfx_z', 'px', 'pz', |
| 'x0', 'y0', 'z0', 'vx0', 'vy0', 'vz0', 'ax', 'ay', 'az', |
| 'break_y', 'break_angle', 'break_length', 'pitch_type', |
| 'type_confidence'), |
| grab.atbat= c('b', 's', 'o', 'batter', 'pitcher', 'b_height', |
| 'stand', 'p_throws', 'event')) { |
| # write initial variables on file |
| meta<- c('Year', 'Month', 'Day', 'Inning', 'Home', 'Away') |
| write(c(meta, grab.atbat, grab.pitch), file=fileloc, |
| ncol= length(c(grab.atbat, grab.pitch)) + length(meta), sep='t') |
| # transfer date info |
| start.date<- as.POSIXlt(start.date); end.date<- as.POSIXlt(end.date); |
| diff.date<- as.numeric(difftime(end.date, start.date)) |
| date.range<- as.POSIXlt(seq(start.date, by='days', |
| length=1+diff.date)) |
| for (iin1:(diff.date+1)) { |
| year<-date.range[i]$year+1900 |
| month<-date.range[i]$mon+1 |
| day<-date.range[i]$mday |
| URL.date<- paste(URL.base, 'year_', year, '/', |
| ifelse(month>=10, 'month_', 'month_0'), month, '/', |
| ifelse(day>=10, 'day_', 'day_0'), day, '/', sep='') |
| # grab matchups for today |
| ## URL.scoreboard <- paste(URL.date, 'miniscoreboard.xml', sep = ') |
| ## XML.scoreboard <- xmlInternalTreeParse(URL.scoreboard) |
| ## parse.scoreboard <- xpathSApply(XML.scoreboard, '//game[@gameday_link]', |
| ## xmlGetAttr, 'gameday_link') |
| HTML.day<- htmlParse(URL.date) |
| parse.day<- xpathSApply(HTML.day, '//a[@*]', xmlGetAttr, 'href') |
| parse.day<-parse.day[grep('^gid_*', parse.day)] |
| # if games exists today |
| if (length(parse.day) >=1) { |
| # for each game |
| for (gamein1:length(parse.day)) { |
| print(game) |
| URL.game<- paste(URL.date, parse.day[game], sep='') |
| HTML.game<- htmlParse(URL.game) |
| parse.game.exists<- xpathSApply(HTML.game, '//a[@*]', xmlGetAttr, 'href') |
| # if game.xml exists |
| if (sum(match(parse.game.exists, 'game.xml'), na.rm=T) >0) { |
| # grab game type (regular season, etc.) and gameday type (pitch f/x, etc.) |
| XML.game<- xmlInternalTreeParse(paste(URL.game, 'game.xml', sep='')) |
| parse.game<- sapply(c('type', 'gameday_sw'), function (x) |
| xpathSApply(XML.game, '//game[@*]', xmlGetAttr, x)) |
| # if proper game type: 'R' ~ regular season, 'S' ~ spring, 'D' ~ divison series |
| # 'L' ~ league chamption series, 'W' ~ world series |
| if (parse.game['type'] game.type) { |
| # grab team names |
| parse.teams<- sapply(c('abbrev'), function (x) |
| xpathSApply(XML.game, '//team[@*]', xmlGetAttr, x)) |
| home<-parse.teams[1]; away<-parse.teams[2] |
| # if pitch f/x data exists |
| if (parse.game['gameday_sw'] 'E'|parse.game['gameday_sw'] 'P') { |
| # grab number of innings played |
| HTML.Ninnings<- htmlParse(paste(URL.game, 'inning/', sep='')) |
| parse.Ninnings<- xpathSApply(HTML.Ninnings, '//a[@*]', xmlGetAttr, 'href') |
| # check to see if game exists data by checking innings > 1 |
| if (length(grep('^inning_[0-9]', parse.Ninnings)) >1) { |
| # for each inning |
| for (inningin1:length(grep('^inning_[0-9]', parse.Ninnings))) { |
| # grab inning info |
| URL.inning<- paste(URL.game, 'inning/', 'inning_', inning, |
| '.xml', sep='') |
| XML.inning<- xmlInternalTreeParse(URL.inning) |
| parse.atbat<- xpathSApply(XML.inning, '//atbat[@*]') |
| parse.Npitches.atbat<- sapply(parse.atbat, function(x) |
| sum(names(xmlChildren(x)) 'pitch')) |
| # check to see if atbat exists |
| if (length(parse.atbat) >0) { |
| print(paste(parse.day[game], 'inning =', inning)) |
| # parse attributes from pitch and atbat (ugh, ugly) |
| parse.pitch<- sapply(grab.pitch, function(x) |
| as.character(xpathSApply(XML.inning, '//pitch[@*]', |
| xmlGetAttr, x))) |
| parse.pitch<-if (class(parse.pitch) 'character') { |
| t(parse.pitch) |
| } else apply(parse.pitch, 2, as.character) |
| results.atbat<- t(sapply(parse.atbat, function(x) |
| xmlAttrs(x)[grab.atbat])) |
| results.atbat<-results.atbat[rep(seq(nrow(results.atbat)), |
| times=parse.Npitches.atbat),] |
| results.atbat<-if (class(results.atbat) 'character') { |
| t(results.atbat) |
| } elseresults.atbat |
| ## parse.pitch <- sapply(grab.pitch, function(x) |
| ## xpathSApply(XML.inning, '//pitch[@*]', |
| ## xmlGetAttr, x)) |
| ## parse.pitch <- apply(parse.pitch, 2, as.character) |
| ## results.atbat <- t(sapply(parse.atbat, function(x) |
| ## xmlAttrs(x)[grab.atbat])) |
| ## results.atbat <- results.atbat[rep(seq(nrow(results.atbat)), |
| ## times = parse.Npitches.atbat),] |
| # write results |
| write(t(cbind(year, month, day, inning, home, away, |
| results.atbat, parse.pitch)), file=fileloc, |
| ncol= length(c(grab.atbat, grab.pitch)) + length(meta), |
| append=T, sep='t') |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
Pitch X Data Download Software
Sign up for freeto join this conversation on GitHub. Already have an account? Sign in to comment