Spaces:

guramritpal-saggu-12
/

apollo

Running

App Files Files Community

guramritpal-saggu-12 commited on 15 days ago

Commit

82eba10

verified ·

1 Parent(s): f729559

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +53 -36

src/streamlit_app.py CHANGED Viewed

@@ -12,11 +12,13 @@ from rank_bm25 import BM25Okapi
 from sentence_transformers import CrossEncoder
 from openai import OpenAI
-ROOT_DIR = Path(__file__).resolve().parents[1]
-DATA_PATH = 'ipl_knowledge_base.json'
-CSV_PATH = 'cricket_data.csv'
-VECTOR_DIR = ROOT_DIR / 'vector_store/ui_chroma'
 VECTOR_DIR.mkdir(parents=True, exist_ok=True)
 FETCH_K = 8
 CONTEXT_K = 4
 COLLECTION_NAME = 'ipl_rag_ui'
@@ -42,13 +44,18 @@ if api_key:
 @st.cache_data(show_spinner=False)
 def load_kb() -> Dict[str, Any]:
-    with DATA_PATH.open() as f:
         return json.load(f)
 @st.cache_data(show_spinner=False)
 def load_stats_df() -> pd.DataFrame:
-    df = pd.read_csv(CSV_PATH)
     df = df[df['Year'] != 'No stats'].copy()
     df['Year'] = pd.to_numeric(df['Year'])
     numeric_cols = [
@@ -373,33 +380,43 @@ def run_agent(question: str, kb: Dict[str, Any], stats_df: pd.DataFrame, collect
     return second.choices[0].message.content, contexts
-kb = load_kb()
-stats_df = load_stats_df()
-stats_payload = stats_df.to_json(orient='records')
-if st.sidebar.button('Build / refresh vector store', disabled=not api_key):
-    init_vector_store.clear()
-    st.sidebar.success('Rebuilt vector store')
-if not api_key:
-    st.warning('Provide an OpenAI API key to run the agent.')
-    st.stop()
-corpus, collection = init_vector_store(kb, stats_payload)
-query = st.text_area('Ask anything about IPL 2024 (matches, players, venues, tactics)', height=140)
-if st.button('Run query', disabled=not query.strip()):
-    with st.spinner('Calling vector DB + RAG agent...'):
-        answer, contexts = run_agent(query.strip(), kb, stats_df, collection, rerank_strategy)
-    st.success('Answer')
-    st.write(answer)
-    with st.expander('Retrieved context'):
-        for ctx in contexts:
-            sim = ctx.get('score', 0.0)
-            rerank_score = ctx.get('rerank_score')
-            suffix = f", rerank={rerank_score:.2f}" if rerank_score is not None else ''
-            st.markdown(f"**{ctx.get('type','doc')}::{ctx.get('id','unknown')}** (sim={sim:.2f}{suffix})")
-            st.write(ctx['text'])
-            st.divider()
-else:
-    st.info('Enter a query and click run to test the pipeline.')

 from sentence_transformers import CrossEncoder
 from openai import OpenAI
+# For Hugging Face Spaces: assume data files are in the same directory as app.py
+SCRIPT_DIR = Path(__file__).parent.resolve()
+DATA_PATH = SCRIPT_DIR / 'ipl_knowledge_base.json'
+CSV_PATH = SCRIPT_DIR / 'cricket_data.csv'
+VECTOR_DIR = SCRIPT_DIR / 'vector_store'
 VECTOR_DIR.mkdir(parents=True, exist_ok=True)
 FETCH_K = 8
 CONTEXT_K = 4
 COLLECTION_NAME = 'ipl_rag_ui'
 @st.cache_data(show_spinner=False)
 def load_kb() -> Dict[str, Any]:
+    """Load knowledge base JSON file."""
+    # Ensure DATA_PATH is a Path object
+    data_path = Path(DATA_PATH)
+    with open(data_path, 'r', encoding='utf-8') as f:
         return json.load(f)
 @st.cache_data(show_spinner=False)
 def load_stats_df() -> pd.DataFrame:
+    """Load and preprocess CSV stats."""
+    csv_path = Path(CSV_PATH)
+    df = pd.read_csv(csv_path)
     df = df[df['Year'] != 'No stats'].copy()
     df['Year'] = pd.to_numeric(df['Year'])
     numeric_cols = [
     return second.choices[0].message.content, contexts
+# Main execution
+try:
+    kb = load_kb()
+    stats_df = load_stats_df()
+    stats_payload = stats_df.to_json(orient='records')
+    if st.sidebar.button('Build / refresh vector store', disabled=not api_key):
+        init_vector_store.clear()
+        st.sidebar.success('Rebuilt vector store')
+    if not api_key:
+        st.warning('Provide an OpenAI API key to run the agent.')
+        st.stop()
+    corpus, collection = init_vector_store(kb, stats_payload)
+    query = st.text_area('Ask anything about IPL 2024 (matches, players, venues, tactics)', height=140)
+    if st.button('Run query', disabled=not query.strip()):
+        with st.spinner('Calling vector DB + RAG agent...'):
+            answer, contexts = run_agent(query.strip(), kb, stats_df, collection, rerank_strategy)
+        st.success('Answer')
+        st.write(answer)
+        with st.expander('Retrieved context'):
+            for ctx in contexts:
+                sim = ctx.get('score', 0.0)
+                rerank_score = ctx.get('rerank_score')
+                suffix = f", rerank={rerank_score:.2f}" if rerank_score is not None else ''
+                st.markdown(f"**{ctx.get('type','doc')}::{ctx.get('id','unknown')}** (sim={sim:.2f}{suffix})")
+                st.write(ctx['text'])
+                st.divider()
+    else:
+        st.info('Enter a query and click run to test the pipeline.')
+except FileNotFoundError as e:
+    st.error(f'Data file not found: {e}')
+    st.info(f'Looking for files in: {SCRIPT_DIR}')
+    st.info('Please ensure ipl_knowledge_base.json and cricket_data.csv are in the same directory as app.py')
+except Exception as e:
+    st.error(f'Error loading application: {e}')
+    import traceback
+    st.code(traceback.format_exc())