Spaces:

stcoats
/

tspace

Sleeping

App Files Files Community

stcoats commited on Mar 13, 2025

Commit

b5ff3a4

1 Parent(s): 548bf4c

Add application file

Browse files

Files changed (1) hide show

app.py +69 -1

app.py CHANGED Viewed

@@ -1,3 +1,71 @@
 import streamlit as st
-st.title("If you see this, the Space works")

+import os
 import streamlit as st
+import duckdb
+import pandas as pd
+from huggingface_hub import hf_hub_download
+DB_PATH = "/data/ycsep.duckdb"
+REPO_ID = "stcoats/temp-duckdb-upload"
+FILENAME = "ycsep.duckdb"
+st.title("YCSEP Audio Dataset Viewer")
+# Step 1: Show storage status
+st.write("Checking persistent storage...")
+st.write(f"Expected DB location: `{DB_PATH}`")
+st.write(f"File exists: {os.path.exists(DB_PATH)}")
+# Step 2: Try downloading if needed
+if not os.path.exists(DB_PATH):
+    st.write("Database not found in persistent storage. Downloading from HF Hub...")
+    try:
+        path = hf_hub_download(
+            repo_id=REPO_ID,
+            repo_type="dataset",
+            filename=FILENAME,
+            local_dir="/data",
+            local_dir_use_symlinks=False,
+        )
+        st.success(f"Downloaded to {path}")
+    except Exception as e:
+        st.error(f"Download failed: {e}")
+        st.stop()
+# Step 3: Try loading DB
+try:
+    st.write("Connecting to DuckDB...")
+    con = duckdb.connect(DB_PATH, read_only=True)
+    st.write("Reading table...")
+    df = con.execute("SELECT * FROM data").df()
+    st.success(f"Loaded {len(df)} rows.")
+except Exception as e:
+    st.error(f"DuckDB load failed: {e}")
+    st.stop()
+# Step 4: Proceed with app
+query = st.text_input("Search text or speaker")
+if query:
+    filtered_df = df[df["text"].str.contains(query, case=False, na=False) |
+                     df["speaker"].astype(str).str.contains(query, case=False, na=False)]
+else:
+    filtered_df = df
+rows_per_page = 10
+total_rows = len(filtered_df)
+total_pages = (total_rows - 1) // rows_per_page + 1
+page = st.number_input("Page", min_value=1, max_value=total_pages, value=1)
+start = (page - 1) * rows_per_page
+end = start + rows_per_page
+page_df = filtered_df.iloc[start:end]
+for _, row in page_df.iterrows():
+    st.markdown(f"**Speaker:** {row['speaker']}")
+    st.markdown(f"**Text:** {row['text']}")
+    if isinstance(row['audio'], str) and row['audio'].startswith("http"):
+        st.audio(row['audio'], format="audio/mp3")
+    else:
+        st.warning("Audio not available")
+    st.markdown("---")