Update app.py
Browse files
app.py
CHANGED
|
@@ -15,7 +15,7 @@ import numpy as np
|
|
| 15 |
import pandas as pd
|
| 16 |
import streamlit as st
|
| 17 |
|
| 18 |
-
from txtai
|
| 19 |
|
| 20 |
|
| 21 |
class Stats:
|
|
@@ -125,7 +125,7 @@ class Stats:
|
|
| 125 |
|
| 126 |
embeddings = Embeddings(
|
| 127 |
{
|
| 128 |
-
"transform":
|
| 129 |
}
|
| 130 |
)
|
| 131 |
|
|
@@ -253,10 +253,10 @@ class Batting(Stats):
|
|
| 253 |
]
|
| 254 |
|
| 255 |
def load(self):
|
| 256 |
-
# Retrieve raw data
|
| 257 |
-
players = pd.read_csv("https://
|
| 258 |
-
batting = pd.read_csv("https://
|
| 259 |
-
fielding = pd.read_csv("https://
|
| 260 |
|
| 261 |
# Merge player data in
|
| 262 |
batting = pd.merge(players, batting, how="inner", on=["playerID"])
|
|
@@ -387,9 +387,9 @@ class Pitching(Stats):
|
|
| 387 |
]
|
| 388 |
|
| 389 |
def load(self):
|
| 390 |
-
# Retrieve raw data
|
| 391 |
-
players = pd.read_csv("https://
|
| 392 |
-
pitching = pd.read_csv("https://
|
| 393 |
|
| 394 |
# Merge player data in
|
| 395 |
pitching = pd.merge(players, pitching, how="inner", on=["playerID"])
|
|
@@ -439,9 +439,9 @@ class Application:
|
|
| 439 |
st.markdown(
|
| 440 |
"""
|
| 441 |
This application finds the best matching historical players using vector search with [txtai](https://github.com/neuml/txtai).
|
| 442 |
-
Raw data is from the [Baseball Databank](https://github.com/chadwickbureau
|
| 443 |
article](https://medium.com/neuml/explore-baseball-history-with-vector-search-5778d98d6846) for more details.
|
| 444 |
-
|
| 445 |
)
|
| 446 |
|
| 447 |
player, search = st.tabs(["Player", "Search"])
|
|
@@ -488,7 +488,8 @@ class Application:
|
|
| 488 |
self.table(results, ["link", "nameFirst", "nameLast", "teamID"] + stats.columns[1:])
|
| 489 |
|
| 490 |
# Save parameters
|
| 491 |
-
|
|
|
|
| 492 |
|
| 493 |
def search(self):
|
| 494 |
"""
|
|
@@ -497,7 +498,7 @@ class Application:
|
|
| 497 |
|
| 498 |
st.markdown("Find players with similar statistics.")
|
| 499 |
|
| 500 |
-
category = self.category("Batting", "searchcategory")
|
| 501 |
with st.form("search"):
|
| 502 |
if category == "Batting":
|
| 503 |
stats, columns = self.batting, self.batting.columns[:-6]
|
|
@@ -524,8 +525,7 @@ class Application:
|
|
| 524 |
"""
|
| 525 |
|
| 526 |
# Get parameters
|
| 527 |
-
params = st.
|
| 528 |
-
params = {x: params[x][0] for x in params}
|
| 529 |
|
| 530 |
# Sync parameters with session state
|
| 531 |
if all(x in st.session_state for x in ["category", "name", "year"]):
|
|
@@ -627,7 +627,7 @@ class Application:
|
|
| 627 |
chart = (chart + rule).encode(y=alt.Y(title=metric)).properties(height=200).configure_axis(grid=False)
|
| 628 |
|
| 629 |
# Draw chart
|
| 630 |
-
st.altair_chart(chart + rule, theme="streamlit",
|
| 631 |
|
| 632 |
def table(self, results, columns):
|
| 633 |
"""
|
|
@@ -643,7 +643,7 @@ class Application:
|
|
| 643 |
results,
|
| 644 |
column_order=columns,
|
| 645 |
column_config={
|
| 646 |
-
"link": st.column_config.LinkColumn("Link", width="small"),
|
| 647 |
"yearID": st.column_config.NumberColumn("Year", format="%d"),
|
| 648 |
"nameFirst": "First",
|
| 649 |
"nameLast": "Last",
|
|
|
|
| 15 |
import pandas as pd
|
| 16 |
import streamlit as st
|
| 17 |
|
| 18 |
+
from txtai import Embeddings
|
| 19 |
|
| 20 |
|
| 21 |
class Stats:
|
|
|
|
| 125 |
|
| 126 |
embeddings = Embeddings(
|
| 127 |
{
|
| 128 |
+
"transform": Stats.transform
|
| 129 |
}
|
| 130 |
)
|
| 131 |
|
|
|
|
| 253 |
]
|
| 254 |
|
| 255 |
def load(self):
|
| 256 |
+
# Retrieve raw data
|
| 257 |
+
players = pd.read_csv("https://hf.co/datasets/neuml/baseballdatabank/resolve/main/People.csv")
|
| 258 |
+
batting = pd.read_csv("https://hf.co/datasets/neuml/baseballdatabank/resolve/main/Batting.csv")
|
| 259 |
+
fielding = pd.read_csv("https://hf.co/datasets/neuml/baseballdatabank/resolve/main/Fielding.csv")
|
| 260 |
|
| 261 |
# Merge player data in
|
| 262 |
batting = pd.merge(players, batting, how="inner", on=["playerID"])
|
|
|
|
| 387 |
]
|
| 388 |
|
| 389 |
def load(self):
|
| 390 |
+
# Retrieve raw data
|
| 391 |
+
players = pd.read_csv("https://hf.co/datasets/neuml/baseballdatabank/resolve/main/People.csv")
|
| 392 |
+
pitching = pd.read_csv("https://hf.co/datasets/neuml/baseballdatabank/resolve/main/Pitching.csv")
|
| 393 |
|
| 394 |
# Merge player data in
|
| 395 |
pitching = pd.merge(players, pitching, how="inner", on=["playerID"])
|
|
|
|
| 439 |
st.markdown(
|
| 440 |
"""
|
| 441 |
This application finds the best matching historical players using vector search with [txtai](https://github.com/neuml/txtai).
|
| 442 |
+
Raw data is from the [Baseball Databank](https://github.com/chadwickbureau) GitHub project. Read [this
|
| 443 |
article](https://medium.com/neuml/explore-baseball-history-with-vector-search-5778d98d6846) for more details.
|
| 444 |
+
"""
|
| 445 |
)
|
| 446 |
|
| 447 |
player, search = st.tabs(["Player", "Search"])
|
|
|
|
| 488 |
self.table(results, ["link", "nameFirst", "nameLast", "teamID"] + stats.columns[1:])
|
| 489 |
|
| 490 |
# Save parameters
|
| 491 |
+
for name, value in [("category", category), ("name", name), ("year", year)]:
|
| 492 |
+
st.query_params[name] = value
|
| 493 |
|
| 494 |
def search(self):
|
| 495 |
"""
|
|
|
|
| 498 |
|
| 499 |
st.markdown("Find players with similar statistics.")
|
| 500 |
|
| 501 |
+
stats, category = None, self.category("Batting", "searchcategory")
|
| 502 |
with st.form("search"):
|
| 503 |
if category == "Batting":
|
| 504 |
stats, columns = self.batting, self.batting.columns[:-6]
|
|
|
|
| 525 |
"""
|
| 526 |
|
| 527 |
# Get parameters
|
| 528 |
+
params = {x: st.query_params.get(x) for x in ["category", "name", "year"]}
|
|
|
|
| 529 |
|
| 530 |
# Sync parameters with session state
|
| 531 |
if all(x in st.session_state for x in ["category", "name", "year"]):
|
|
|
|
| 627 |
chart = (chart + rule).encode(y=alt.Y(title=metric)).properties(height=200).configure_axis(grid=False)
|
| 628 |
|
| 629 |
# Draw chart
|
| 630 |
+
st.altair_chart(chart + rule, theme="streamlit", width="stretch")
|
| 631 |
|
| 632 |
def table(self, results, columns):
|
| 633 |
"""
|
|
|
|
| 643 |
results,
|
| 644 |
column_order=columns,
|
| 645 |
column_config={
|
| 646 |
+
"link": st.column_config.LinkColumn("Link", width="small", display_text=":material/open_in_new:"),
|
| 647 |
"yearID": st.column_config.NumberColumn("Year", format="%d"),
|
| 648 |
"nameFirst": "First",
|
| 649 |
"nameLast": "Last",
|