Pitchf/x Data Download

A PITCHf/x primer Posted by Mike Fast under Uncategorized [6]. MLBAM used the PITCHf/x data in their Enhanced Gameday application and also made the data freely available for downloading and research. If you want to manipulate and analyze a single game’s worth of data, you can download and import the XML files into a Microsoft Excel. My Profile Settings Download the App Sign Out. Xx MB of 10 GB used Scott Lindholm - Profile Favorite Download Workbook. 2018 PITCHf/x Data 349 views| Scott Lindholm. Baseball Savant PITCHf/x Data Pct of called balls in strike zone (BiZ%) plotted against called strikes outside strike zone (SoZ%) Reference lines are medians.

  1. Pitchf X System
  2. Pitch X Data Download Software
Yahoo data download
Download pitchfx XML data (haven't tested in years).
DownloadPitchFX.R

Pitchf X System

Pitch x data download free
# DownloadPitchFX.R
# downloads the massive MLB Gameday data.
# Version 0.4
# Version History
# 0.5 ~ grab player data, both pitchers and batters, ability to pick team
# 0.4 ~ get team data, and ability to grab team info, checks to see if regular season
# 0.3 ~ updated so 2010 works, fixed some bugs, and saves as tab delimited file
# 0.2 ~ inputs are start and end dates
# 0.1 ~ grab Pitch f/x data from MLB Gameday, specify date ranges (takes half a minute for a day's worth of data on my 2.5Ghz machine)
# Future Versions:
# ~ ability to pick pitchers, batters, teams
# - ability to grab matchups
# - better searching instead of tediously parsing through each XML file
# ~ connect to mysql database
# ~ don't overheat computer!
# ~ document Gameday Code
# downloading pitch f/x data from MLB website
# Get data from http://gd2.mlb.com/components/game/mlb/
# XML package http://www.omegahat.org/RSXML/shortIntro.html
# Perl script of same application by Mike Fast:
# http://fastballs.files.wordpress.com/2007/09/hack_28_parser_mikefast_test_pl.txt
# Less general R code from Erik Iverson of Blogistic Reflections:
# http://blogisticreflections.wordpress.com/2009/10/04/using-r-to-analyze-baseball-games-in-real-time/
# listing of pitch f/x tools by Baseball Analysts
# http://baseballanalysts.com/archives/2010/03/how_can_i_get_m.php
# downloadable pitch f/x database from Darrell Zimmerman
# http://www.wantlinux.net/category/baseball-data/
# I think gameday data starts 2005
# I think enhanced gameday (pitch fx) has all of 2009, most of 2008, some 2007, tiny bit 2006
# required libraries:
library(XML)
# code for <game type> in game.xml (input game.type in code)
# 'S' ~ spring training, 'R' ~ regular season, 'D' ~ Division Series
# 'L' ~ League Championship Series 'W' ~ World Series
# code for <game gameday_sw> in game.xml
# http://sports.dir.groups.yahoo.com/group/RetroSQL/message/320
# 'N' ~ missing, no pitch info
# 'Y' ~ standard w/ pitch locations
# 'E' ~ w/ pitch f/x
# 'P' ~ for 2010, whatever that's supposed to mean
# code for teams
# code for players
# code for gameday
# code for pitch type
# code for atbat type
# checks for:
# gameday type
# home, away
# player, batter, pitch type
# -----------------------------------------------------------
DownloadPitchFX<-function(fileloc='./pitchfx.txt',
start.date='2009-05-02', end.date=start.date,
URL.base='http://gd2.mlb.com/components/game/mlb/',
game.type='R',
grab.pitch= c('des', 'type', 'x', 'y',
'start_speed', 'end_speed',
'sz_top', 'sz_bot', 'pfx_x', 'pfx_z', 'px', 'pz',
'x0', 'y0', 'z0', 'vx0', 'vy0', 'vz0', 'ax', 'ay', 'az',
'break_y', 'break_angle', 'break_length', 'pitch_type',
'type_confidence'),
grab.atbat= c('b', 's', 'o', 'batter', 'pitcher', 'b_height',
'stand', 'p_throws', 'event')) {
# write initial variables on file
meta<- c('Year', 'Month', 'Day', 'Inning', 'Home', 'Away')
write(c(meta, grab.atbat, grab.pitch), file=fileloc,
ncol= length(c(grab.atbat, grab.pitch)) + length(meta), sep='t')
# transfer date info
start.date<- as.POSIXlt(start.date); end.date<- as.POSIXlt(end.date);
diff.date<- as.numeric(difftime(end.date, start.date))
date.range<- as.POSIXlt(seq(start.date, by='days',
length=1+diff.date))
for (iin1:(diff.date+1)) {
year<-date.range[i]$year+1900
month<-date.range[i]$mon+1
day<-date.range[i]$mday
URL.date<- paste(URL.base, 'year_', year, '/',
ifelse(month>=10, 'month_', 'month_0'), month, '/',
ifelse(day>=10, 'day_', 'day_0'), day, '/', sep='')
# grab matchups for today
## URL.scoreboard <- paste(URL.date, 'miniscoreboard.xml', sep = ')
## XML.scoreboard <- xmlInternalTreeParse(URL.scoreboard)
## parse.scoreboard <- xpathSApply(XML.scoreboard, '//game[@gameday_link]',
## xmlGetAttr, 'gameday_link')
HTML.day<- htmlParse(URL.date)
parse.day<- xpathSApply(HTML.day, '//a[@*]', xmlGetAttr, 'href')
parse.day<-parse.day[grep('^gid_*', parse.day)]
# if games exists today
if (length(parse.day) >=1) {
# for each game
for (gamein1:length(parse.day)) {
print(game)
URL.game<- paste(URL.date, parse.day[game], sep='')
HTML.game<- htmlParse(URL.game)
parse.game.exists<- xpathSApply(HTML.game, '//a[@*]', xmlGetAttr, 'href')
# if game.xml exists
if (sum(match(parse.game.exists, 'game.xml'), na.rm=T) >0) {
# grab game type (regular season, etc.) and gameday type (pitch f/x, etc.)
XML.game<- xmlInternalTreeParse(paste(URL.game, 'game.xml', sep=''))
parse.game<- sapply(c('type', 'gameday_sw'), function (x)
xpathSApply(XML.game, '//game[@*]', xmlGetAttr, x))
# if proper game type: 'R' ~ regular season, 'S' ~ spring, 'D' ~ divison series
# 'L' ~ league chamption series, 'W' ~ world series
if (parse.game['type'] game.type) {
# grab team names
parse.teams<- sapply(c('abbrev'), function (x)
xpathSApply(XML.game, '//team[@*]', xmlGetAttr, x))
home<-parse.teams[1]; away<-parse.teams[2]
# if pitch f/x data exists
if (parse.game['gameday_sw'] 'E'|parse.game['gameday_sw'] 'P') {
# grab number of innings played
HTML.Ninnings<- htmlParse(paste(URL.game, 'inning/', sep=''))
parse.Ninnings<- xpathSApply(HTML.Ninnings, '//a[@*]', xmlGetAttr, 'href')
# check to see if game exists data by checking innings > 1
if (length(grep('^inning_[0-9]', parse.Ninnings)) >1) {
# for each inning
for (inningin1:length(grep('^inning_[0-9]', parse.Ninnings))) {
# grab inning info
URL.inning<- paste(URL.game, 'inning/', 'inning_', inning,
'.xml', sep='')
XML.inning<- xmlInternalTreeParse(URL.inning)
parse.atbat<- xpathSApply(XML.inning, '//atbat[@*]')
parse.Npitches.atbat<- sapply(parse.atbat, function(x)
sum(names(xmlChildren(x)) 'pitch'))
# check to see if atbat exists
if (length(parse.atbat) >0) {
print(paste(parse.day[game], 'inning =', inning))
# parse attributes from pitch and atbat (ugh, ugly)
parse.pitch<- sapply(grab.pitch, function(x)
as.character(xpathSApply(XML.inning, '//pitch[@*]',
xmlGetAttr, x)))
parse.pitch<-if (class(parse.pitch) 'character') {
t(parse.pitch)
} else apply(parse.pitch, 2, as.character)
results.atbat<- t(sapply(parse.atbat, function(x)
xmlAttrs(x)[grab.atbat]))
results.atbat<-results.atbat[rep(seq(nrow(results.atbat)),
times=parse.Npitches.atbat),]
results.atbat<-if (class(results.atbat) 'character') {
t(results.atbat)
} elseresults.atbat
## parse.pitch <- sapply(grab.pitch, function(x)
## xpathSApply(XML.inning, '//pitch[@*]',
## xmlGetAttr, x))
## parse.pitch <- apply(parse.pitch, 2, as.character)
## results.atbat <- t(sapply(parse.atbat, function(x)
## xmlAttrs(x)[grab.atbat]))
## results.atbat <- results.atbat[rep(seq(nrow(results.atbat)),
## times = parse.Npitches.atbat),]
# write results
write(t(cbind(year, month, day, inning, home, away,
results.atbat, parse.pitch)), file=fileloc,
ncol= length(c(grab.atbat, grab.pitch)) + length(meta),
append=T, sep='t')
}
}
}
}
}
}
}
}
}
}

Pitch X Data Download Software

Sign up for freeto join this conversation on GitHub. Already have an account? Sign in to comment