Spaces:

SaviAnna
/

Video_Rag

Sleeping

App Files Files Community

SaviAnna commited on Jun 13, 2025

Commit

2da889a

1 Parent(s): e447f2f

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +94 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,96 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+from dotenv import load_dotenv
 import streamlit as st
+import asyncio
+import os
+import json
+import re
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
+load_dotenv()
+# ---------------------- Sources: parsing + search ----------------------
+def extract_dc_chunks(context_str):
+    match = re.search(r'-----Document Chunks\(DC\)-----\s+```json\n(.*?)```', context_str, re.DOTALL)
+    if not match:
+        return []
+    dc_json_str = match.group(1)
+    return json.loads(dc_json_str)
+def find_matches(dc_chunks, full_chunks_dict):
+    results = []
+    for dc in dc_chunks:
+        dc_content = dc.get("content", "").strip()
+        for chunk_id, chunk_data in full_chunks_dict.items():
+            if chunk_data.get("content", "").strip() == dc_content:
+                results.append({
+                    "timestamp": chunk_data.get("timestamp"),
+                    "file_path": chunk_data.get("file_path"),
+                    "content": dc_content
+                })
+                break
+    return results
+# All sermon chunks with timestamps
+with open("./sermons/kv_store_text_chunks_with_timestamps.json", "r", encoding="utf-8") as f:
+    FULL_CHUNKS_DICT = json.load(f)
+# ---------------------- Streamlit UI ----------------------
+async def main():
+    st.title("LightRAG: Sermons Video Chat Bot BBG")
+    # Safe initialization of LightRAG (without saving to session_state)
+    rag = LightRAG(
+        working_dir="./sermons",
+        embedding_func=openai_embed,
+        llm_model_func=gpt_4o_mini_complete
+    )
+    await rag.initialize_storages()
+    # Initialize message history
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    # Display previous messages
+    for msg in st.session_state.messages:
+        with st.chat_message(msg["role"]):
+            st.markdown(msg["content"])
+    user_input = st.chat_input("What do you want to know?")
+    if user_input:
+        # Save the user's message
+        st.session_state.messages.append({"role": "user", "content": user_input})
+        with st.chat_message("user"):
+            st.markdown(user_input)
+        with st.chat_message("assistant"):
+            # Contextual query (bypasses cache)
+            ctx_query = f"{user_input}\n<!--ctx-->"
+            ctx_param = QueryParam(mode="mix", only_need_context=True, top_k=3)
+            context_chunks = await rag.aquery(ctx_query, param=ctx_param)
+            print(f"Context Chunks: {context_chunks}")
+            # Answer (cached)
+            ans_param = QueryParam(mode="mix")
+            answer = await rag.aquery(user_input, param=ans_param)
+            # Display and save the answer
+            st.markdown(answer)
+            st.session_state.messages.append({"role": "assistant", "content": answer})
+            # Source search
+            dc_chunks = extract_dc_chunks(context_chunks)
+            matched_sources = find_matches(dc_chunks, FULL_CHUNKS_DICT)
+            if matched_sources:
+                sources_md = "#### 📚 Sources:\n" + "\n".join(
+                    f"- **Time:** `{src['timestamp']}` | **File:** `{src['file_path']}`"
+                    for src in matched_sources
+                )
+                st.markdown(sources_md)
+                st.session_state.messages.append({"role": "assistant", "content": sources_md})
+                # Run Streamlit asynchronous app
+if __name__ == "__main__":
+    asyncio.run(main())