GSP-374 : Predict Soccer Match Outcomes with BigQuery ML

GSP-374 : Predict Soccer Match Outcomes with BigQuery ML

Overview

Create all the necessary table:

spls/bq-soccer-analytics/competitions.json -> competitions

spls/bq-soccer-analytics/matches.json -> matches

spls/bq-soccer-analytics/teams.json -> teams

spls/bq-soccer-analytics/players.json -> players

spls/bq-soccer-analytics/events.json -> events

These tables need to be created in this lab.

Task - 1: Data ingestion

Create a table as mentioned in the lab instructions page.

Task - 2: Analyze soccer data

 1SELECT
 2playerId,
 3(Players.firstName || ' ' || Players.lastName) AS playerName,
 4COUNT(id) AS numPKAtt,
 5SUM(IF(101 IN UNNEST(tags.id), 1, 0)) AS numPKGoals,
 6SAFE_DIVIDE(
 7SUM(IF(101 IN UNNEST(tags.id), 1, 0)),
 8COUNT(id)
 9) AS PKSuccessRate
10FROM
11`soccer.<enter table name given in the lab>` Events
12LEFT JOIN
13`soccer.players` Players ON
14Events.playerId = Players.wyId
15WHERE
16eventName = 'Free Kick' AND
17subEventName = 'Penalty'
18GROUP BY
19playerId, playerName
20HAVING
21numPkAtt >= 5
22ORDER BY
23PKSuccessRate DESC, numPKAtt DESC

Task - 3: Gain insight by analyzing soccer data

 1WITH
 2Shots AS
 3(
 4SELECT
 5*,
 6/* 101 is known Tag for 'goals' from goals table */
 7(101 IN UNNEST(tags.id)) AS isGoal,
 8/* Translate 0-100 (x,y) coordinate-based distances to absolute positions
 9using "average" field dimensions of 105x68 before combining in 2D dist calc */
10SQRT(
11POW(
12  (100 - positions[ORDINAL(1)].x) * 120/100,      //Change values according to the lab
13  2) +
14POW(
15  (60 - positions[ORDINAL(1)].y) * 69/100,        //Change values according to the lab
16  2)
17) AS shotDistance
18FROM
19`soccer.<enter table name given in the lab>`
20WHERE
21/* Includes both "open play" & free kick shots (including penalties) */
22eventName = 'Shot' OR
23(eventName = 'Free Kick' AND subEventName IN ('Free kick shot', 'Penalty'))
24)
25SELECT
26ROUND(shotDistance, 0) AS ShotDistRound0,
27COUNT(*) AS numShots,
28SUM(IF(isGoal, 1, 0)) AS numGoals,
29AVG(IF(isGoal, 1, 0)) AS goalPct
30FROM
31Shots
32WHERE
33shotDistance <= 50
34GROUP BY
35ShotDistRound0
36ORDER BY
37ShotDistRound0

Task - 4: Create a regression model using soccer data

 1CREATE MODEL `soccer.xg_logistic_reg_model_238`                        //Change values according to the lab
 2OPTIONS(
 3model_type = 'LOGISTIC_REG',
 4input_label_cols = ['isGoal']
 5) AS
 6SELECT
 7Events.subEventName AS shotType,
 8/* 101 is known Tag for 'goals' from goals table */
 9(101 IN UNNEST(Events.tags.id)) AS isGoal,
10`soccer.GetShotDistanceToGoal238`(Events.positions[ORDINAL(1)].x,       //Change values according to the lab
11Events.positions[ORDINAL(1)].y) AS shotDistance,
12`soccer.GetShotAngleToGoal238`(Events.positions[ORDINAL(1)].x,          //Change values according to the lab
13Events.positions[ORDINAL(1)].y) AS shotAngle
14FROM
15`soccer.<enter table name given in the lab>` Events
16LEFT JOIN
17`soccer.matches` Matches ON
18Events.matchId = Matches.wyId
19LEFT JOIN
20`soccer.competitions` Competitions ON
21Matches.competitionId = Competitions.wyId
22WHERE
23/* Filter out World Cup matches for model fitting purposes */
24Competitions.name != 'World Cup' AND
25/* Includes both "open play" & free kick shots (including penalties) */
26(
27eventName = 'Shot' OR
28(eventName = 'Free Kick' AND subEventName IN ('Free kick shot', 'Penalty'))
29)
30;

Task - 5: Make predictions from new data with the BigQuery model

 1SELECT
 2predicted_isGoal_probs[ORDINAL(1)].prob AS predictedGoalProb,
 3* EXCEPT (predicted_isGoal, predicted_isGoal_probs),
 4FROM
 5ML.PREDICT(
 6MODEL `soccer.xg_logistic_reg_model_238`,                                   //Change values according to the lab
 7(
 8SELECT
 9   Events.playerId,
10   (Players.firstName || ' ' || Players.lastName) AS playerName,
11   Teams.name AS teamName,
12   CAST(Matches.dateutc AS DATE) AS matchDate,
13   Matches.label AS match,
14/* Convert match period and event seconds to minute of match */
15   CAST((CASE
16     WHEN Events.matchPeriod = '1H' THEN 0
17     WHEN Events.matchPeriod = '2H' THEN 45
18     WHEN Events.matchPeriod = 'E1' THEN 90
19     WHEN Events.matchPeriod = 'E2' THEN 105
20     ELSE 120
21     END) +
22     CEILING(Events.eventSec / 60) AS INT64)
23     AS matchMinute,
24   Events.subEventName AS shotType,
25   /* 101 is known Tag for 'goals' from goals table */
26   (101 IN UNNEST(Events.tags.id)) AS isGoal,
27
28   `soccer.GetShotDistanceToGoal238`(Events.positions[ORDINAL(1)].x,          //Change values according to the lab
29       Events.positions[ORDINAL(1)].y) AS shotDistance,
30   `soccer.GetShotAngleToGoal238`(Events.positions[ORDINAL(1)].x,             //Change values according to the lab
31       Events.positions[ORDINAL(1)].y) AS shotAngle
32FROM
33   `soccer.<enter table name given in the lab>` Events
34LEFT JOIN
35   `soccer.matches` Matches ON
36       Events.matchId = Matches.wyId
37LEFT JOIN
38   `soccer.competitions` Competitions ON
39       Matches.competitionId = Competitions.wyId
40LEFT JOIN
41   `soccer.players` Players ON
42       Events.playerId = Players.wyId
43LEFT JOIN
44   `soccer.teams` Teams ON
45       Events.teamId = Teams.wyId
46WHERE
47   /* Look only at World Cup matches to apply model */
48   Competitions.name = 'World Cup' AND
49   /* Includes both "open play" & free kick shots (but not penalties) */
50   (
51     eventName = 'Shot' OR
52     (eventName = 'Free Kick' AND subEventName IN ('Free kick shot'))
53   ) AND
54   /* Filter only to goals scored */
55   (101 IN UNNEST(Events.tags.id))
56)
57)
58ORDER BY
59predictedgoalProb

Congratulations, you're all done with the lab 😄