SaviAnna commited on
Commit
2da889a
·
1 Parent(s): e447f2f

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +94 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,96 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
 
 
2
  import streamlit as st
3
+ import asyncio
4
+ import os
5
+ import json
6
+ import re
7
 
8
+ from lightrag import LightRAG, QueryParam
9
+ from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
10
+
11
+ load_dotenv()
12
+
13
+ # ---------------------- Sources: parsing + search ----------------------
14
+
15
+ def extract_dc_chunks(context_str):
16
+ match = re.search(r'-----Document Chunks\(DC\)-----\s+```json\n(.*?)```', context_str, re.DOTALL)
17
+ if not match:
18
+ return []
19
+ dc_json_str = match.group(1)
20
+ return json.loads(dc_json_str)
21
+
22
+ def find_matches(dc_chunks, full_chunks_dict):
23
+ results = []
24
+ for dc in dc_chunks:
25
+ dc_content = dc.get("content", "").strip()
26
+ for chunk_id, chunk_data in full_chunks_dict.items():
27
+ if chunk_data.get("content", "").strip() == dc_content:
28
+ results.append({
29
+ "timestamp": chunk_data.get("timestamp"),
30
+ "file_path": chunk_data.get("file_path"),
31
+ "content": dc_content
32
+ })
33
+ break
34
+ return results
35
+
36
+ # All sermon chunks with timestamps
37
+ with open("./sermons/kv_store_text_chunks_with_timestamps.json", "r", encoding="utf-8") as f:
38
+ FULL_CHUNKS_DICT = json.load(f)
39
+
40
+ # ---------------------- Streamlit UI ----------------------
41
+
42
+ async def main():
43
+ st.title("LightRAG: Sermons Video Chat Bot BBG")
44
+
45
+ # Safe initialization of LightRAG (without saving to session_state)
46
+ rag = LightRAG(
47
+ working_dir="./sermons",
48
+ embedding_func=openai_embed,
49
+ llm_model_func=gpt_4o_mini_complete
50
+ )
51
+ await rag.initialize_storages()
52
+ # Initialize message history
53
+ if "messages" not in st.session_state:
54
+ st.session_state.messages = []
55
+
56
+ # Display previous messages
57
+ for msg in st.session_state.messages:
58
+ with st.chat_message(msg["role"]):
59
+ st.markdown(msg["content"])
60
+
61
+
62
+ user_input = st.chat_input("What do you want to know?")
63
+
64
+ if user_input:
65
+ # Save the user's message
66
+ st.session_state.messages.append({"role": "user", "content": user_input})
67
+ with st.chat_message("user"):
68
+ st.markdown(user_input)
69
+
70
+ with st.chat_message("assistant"):
71
+ # Contextual query (bypasses cache)
72
+ ctx_query = f"{user_input}\n<!--ctx-->"
73
+ ctx_param = QueryParam(mode="mix", only_need_context=True, top_k=3)
74
+ context_chunks = await rag.aquery(ctx_query, param=ctx_param)
75
+ print(f"Context Chunks: {context_chunks}")
76
+ # Answer (cached)
77
+ ans_param = QueryParam(mode="mix")
78
+ answer = await rag.aquery(user_input, param=ans_param)
79
+
80
+ # Display and save the answer
81
+ st.markdown(answer)
82
+ st.session_state.messages.append({"role": "assistant", "content": answer})
83
+
84
+ # Source search
85
+ dc_chunks = extract_dc_chunks(context_chunks)
86
+ matched_sources = find_matches(dc_chunks, FULL_CHUNKS_DICT)
87
+ if matched_sources:
88
+ sources_md = "#### 📚 Sources:\n" + "\n".join(
89
+ f"- **Time:** `{src['timestamp']}` | **File:** `{src['file_path']}`"
90
+ for src in matched_sources
91
+ )
92
+ st.markdown(sources_md)
93
+ st.session_state.messages.append({"role": "assistant", "content": sources_md})
94
+ # Run Streamlit asynchronous app
95
+ if __name__ == "__main__":
96
+ asyncio.run(main())