Sami Ali commited on
Commit
25b6019
·
1 Parent(s): e0d25d8

add two files one for streamlit and one for huggingface

Browse files
Files changed (1) hide show
  1. streamlit_app.py +71 -0
streamlit_app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.data_processor import DataProcessor
2
+ from src.embedding import EmbeddingManager
3
+ from src.vectorstore import VectorStore
4
+ from src.download_data import download_pmc_docs
5
+ from src.llm import LLM
6
+ from tqdm import tqdm
7
+
8
+ import streamlit as st
9
+
10
+ @st.cache_resource(show_spinner="🔄 Building pipeline...")
11
+ def load_pipeline():
12
+ limit = 2000
13
+ download_pmc_docs(limit=limit)
14
+ dp = DataProcessor()
15
+ chunks, document = dp.build()
16
+ chunks_list = [c.page_content for c in tqdm(chunks, desc='Chunking')]
17
+ embd = EmbeddingManager()
18
+ embd_model = embd.get_model()
19
+ chunks_embedding = embd.embed_texts(chunks_list)
20
+ vectorstore = VectorStore()
21
+ vectorstore.add_documents(chunks, chunks_embedding)
22
+ retriever = vectorstore.get_retriever(embd_model)
23
+ llm = LLM(retriever)
24
+ return llm
25
+
26
+
27
+ if __name__ == '__main__':
28
+
29
+ st.set_page_config(
30
+ page_title="MedRAG: AI-Powered Biomedical Paper Search",
31
+ layout="wide",
32
+ page_icon="🧬",
33
+ )
34
+
35
+ st.title("🧬 MedRAG")
36
+ st.caption("Ask questions. Explore research. Ground answers in biomedical literature.")
37
+
38
+ llm = load_pipeline()
39
+
40
+ if "chat_history" not in st.session_state:
41
+ st.session_state.chat_history = []
42
+
43
+ for q, a in st.session_state.chat_history:
44
+ with st.chat_message("user"):
45
+ st.write(q)
46
+ with st.chat_message("assistant"):
47
+ st.write(a)
48
+
49
+ if query := st.chat_input("Type your biomedical question here..."):
50
+ with st.chat_message("user"):
51
+ st.write(query)
52
+
53
+ with st.chat_message("assistant"):
54
+ with st.spinner("🤖 Thinking... please wait"):
55
+ result = llm.invoke(query)
56
+ answer = result["result"]
57
+
58
+ sources = []
59
+
60
+ if result['source_documents']:
61
+ for doc in result['source_documents']:
62
+ preview = doc.page_content[:200].replace("\n", " ")
63
+ sources.append(preview + "...")
64
+ st.write(answer)
65
+
66
+ if sources:
67
+ with st.expander('📚 Sources'):
68
+ for idx, src in enumerate(sources, 1):
69
+ st.markdown(f"**{idx}.** {src}")
70
+
71
+ st.session_state.chat_history.append((query, (answer, sources)))