chburhan64 commited on
Commit
ca7640e
Β·
verified Β·
1 Parent(s): 2947fa1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -45
app.py CHANGED
@@ -18,76 +18,158 @@ load_dotenv()
18
  groq_api_key = os.getenv("GROQ_API_KEY")
19
 
20
  # Streamlit UI setup
21
- st.set_page_config(page_title="Document Q&A with Llama3")
22
- st.title("πŸ“„ Document Q&A with Llama3 (via Groq)")
 
23
 
24
  # Load Groq LLM (Llama3)
25
  llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")
26
 
27
- # Prompt template
28
- prompt = ChatPromptTemplate.from_template("""
29
- Answer the question based only on the provided context.
30
 
 
 
 
31
  <context>
32
  {context}
33
  </context>
 
 
 
 
34
 
35
- Question: {input}
36
  """)
37
 
38
- # Use HuggingFace Embeddings
39
- embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 
 
 
 
 
 
 
 
 
 
40
 
41
- # Function to extract text and split into chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def process_pdfs(uploaded_files):
43
  documents = []
44
  for file in uploaded_files:
45
- pdf_reader = PyPDF2.PdfReader(file)
46
  text = ""
47
- for page in pdf_reader.pages:
48
  text += page.extract_text() or ""
49
  documents.append(Document(page_content=text, metadata={"source": file.name}))
50
-
51
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
52
  return splitter.split_documents(documents)
53
 
54
- # Function to build FAISS index
55
  def create_vector_store(documents):
56
- vectorstore = FAISS.from_documents(documents, embedding)
57
- return vectorstore
 
 
 
58
 
59
  # File uploader
60
  uploaded_files = st.file_uploader("πŸ“ Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
61
 
62
- # Button to process files
63
  if uploaded_files and st.button("πŸ“š Process Documents"):
64
- with st.spinner("Processing documents..."):
65
  documents = process_pdfs(uploaded_files)
66
- st.session_state.vectors = create_vector_store(documents)
67
- st.success("βœ… Document vector store created!")
68
-
69
- # Question input
70
- query = st.text_input("πŸ’¬ Ask a question about the uploaded documents")
71
-
72
- # If user asks a question
73
- if query and "vectors" in st.session_state:
74
- with st.spinner("Generating answer..."):
75
- document_chain = create_stuff_documents_chain(llm, prompt)
76
- retriever = st.session_state.vectors.as_retriever()
77
- retrieval_chain = create_retrieval_chain(retriever, document_chain)
78
-
79
- start = time.process_time()
80
- response = retrieval_chain.invoke({'input': query})
81
- end = time.process_time()
82
-
83
- st.markdown("### βœ… Answer:")
84
- st.write(response['answer'])
85
- st.markdown(f"⏱️ Response time: {end - start:.2f} seconds")
86
-
87
- with st.expander("πŸ” Relevant Document Chunks"):
88
- for i, doc in enumerate(response.get("context", [])):
89
- st.write(doc.page_content)
90
- st.write("---")
91
-
92
- elif query and "vectors" not in st.session_state:
93
- st.warning("⚠️ Please upload and process PDF documents first.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  groq_api_key = os.getenv("GROQ_API_KEY")
19
 
20
  # Streamlit UI setup
21
+ st.set_page_config(page_title="Multi-Agent Research Assistant", layout="wide")
22
+ st.title("πŸ€– Multi-Agent Research Assistant")
23
+ st.markdown("Enhance your research process with intelligent summarization, critique, debate, translation, and citation. Upload a research paper and let our agents do the thinking!")
24
 
25
  # Load Groq LLM (Llama3)
26
  llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")
27
 
28
+ # Load embedding model
29
+ embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 
30
 
31
+ # Prompt Templates
32
+ summary_prompt = ChatPromptTemplate.from_template("""
33
+ You are a helpful assistant. Summarize the following document clearly and accurately:
34
  <context>
35
  {context}
36
  </context>
37
+ """)
38
+
39
+ gap_prompt = ChatPromptTemplate.from_template("""
40
+ Analyze the following summary and identify key research gaps, unanswered questions, or limitations:
41
 
42
+ {summary}
43
  """)
44
 
45
+ idea_prompt = ChatPromptTemplate.from_template("""
46
+ Given the research gaps:
47
+ {gaps}
48
+
49
+ Suggest 2-3 original research project ideas or questions that address these gaps. Explain why they are valuable.
50
+ """)
51
+
52
+ debate_prompt = ChatPromptTemplate.from_template("""
53
+ Act as two researchers discussing a paper.
54
+
55
+ Supporter: Defends the core idea of the document.
56
+ Critic: Challenges its assumptions, methods, or impact.
57
 
58
+ Use the following summary as reference:
59
+ {summary}
60
+
61
+ Generate a short conversation between them.
62
+ """)
63
+
64
+ translate_prompt = ChatPromptTemplate.from_template("""
65
+ Translate the following content into {language}, preserving meaning and academic tone:
66
+
67
+ {content}
68
+ """)
69
+
70
+ citation_prompt = ChatPromptTemplate.from_template("""
71
+ Generate an APA-style citation based on the document content:
72
+
73
+ {content}
74
+ """)
75
+
76
+ # Extract & process PDFs
77
  def process_pdfs(uploaded_files):
78
  documents = []
79
  for file in uploaded_files:
80
+ reader = PyPDF2.PdfReader(file)
81
  text = ""
82
+ for page in reader.pages:
83
  text += page.extract_text() or ""
84
  documents.append(Document(page_content=text, metadata={"source": file.name}))
 
85
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
86
  return splitter.split_documents(documents)
87
 
88
+ # Create vector store
89
  def create_vector_store(documents):
90
+ return FAISS.from_documents(documents, embedding)
91
+
92
+ # Chain runner helpers
93
+ def run_chain(chain, input_dict):
94
+ return chain.invoke(input_dict)
95
 
96
  # File uploader
97
  uploaded_files = st.file_uploader("πŸ“ Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
98
 
 
99
  if uploaded_files and st.button("πŸ“š Process Documents"):
100
+ with st.spinner("Processing documents and generating vector store..."):
101
  documents = process_pdfs(uploaded_files)
102
+ st.session_state.documents = documents
103
+ st.session_state.vectorstore = create_vector_store(documents)
104
+ st.success("βœ… Document vector store created!")
105
+
106
+ # Agent Activation
107
+ if "documents" in st.session_state:
108
+ st.subheader("πŸŽ“ Master Agent: What would you like me to do?")
109
+ task = st.selectbox("Choose a task:", [
110
+ "Summarize document",
111
+ "Identify research gaps",
112
+ "Suggest research ideas",
113
+ "Simulate a debate",
114
+ "Translate summary",
115
+ "Generate citation"
116
+ ])
117
+ user_language = st.selectbox("🌍 Choose translation language (only for Translate task):", ["Spanish", "French", "German", "Chinese", "Urdu"])
118
+ if st.button("πŸš€ Run Agent"):
119
+ with st.spinner("Running agents..."):
120
+ context = "\n".join([doc.page_content for doc in st.session_state.documents[:10]])
121
+ results = {}
122
+
123
+ # Summarization
124
+ if task == "Summarize document":
125
+ chain = create_stuff_documents_chain(llm, summary_prompt)
126
+ summary = run_chain(chain, {"context": context})
127
+ results["summary"] = summary
128
+ st.markdown("### πŸ“ Summary")
129
+ st.write(summary)
130
+
131
+ # Gap analysis
132
+ elif task == "Identify research gaps":
133
+ chain1 = create_stuff_documents_chain(llm, summary_prompt)
134
+ summary = run_chain(chain1, {"context": context})
135
+ chain2 = create_stuff_documents_chain(llm, gap_prompt)
136
+ gaps = run_chain(chain2, {"summary": summary})
137
+ results["gaps"] = gaps
138
+ st.markdown("### πŸ” Identified Gaps")
139
+ st.write(gaps)
140
+
141
+ # Idea generation
142
+ elif task == "Suggest research ideas":
143
+ chain1 = create_stuff_documents_chain(llm, summary_prompt)
144
+ summary = run_chain(chain1, {"context": context})
145
+ chain2 = create_stuff_documents_chain(llm, gap_prompt)
146
+ gaps = run_chain(chain2, {"summary": summary})
147
+ chain3 = create_stuff_documents_chain(llm, idea_prompt)
148
+ ideas = run_chain(chain3, {"gaps": gaps})
149
+ st.markdown("### πŸ’‘ Research Ideas")
150
+ st.write(ideas)
151
+
152
+ # Debate agent
153
+ elif task == "Simulate a debate":
154
+ chain = create_stuff_documents_chain(llm, summary_prompt)
155
+ summary = run_chain(chain, {"context": context})
156
+ debate_chain = create_stuff_documents_chain(llm, debate_prompt)
157
+ debate = run_chain(debate_chain, {"summary": summary})
158
+ st.markdown("### 🎭 Debate")
159
+ st.write(debate)
160
+
161
+ # Translate agent
162
+ elif task == "Translate summary":
163
+ chain = create_stuff_documents_chain(llm, summary_prompt)
164
+ summary = run_chain(chain, {"context": context})
165
+ translate_chain = create_stuff_documents_chain(llm, translate_prompt)
166
+ translated = run_chain(translate_chain, {"language": user_language, "content": summary})
167
+ st.markdown(f"### 🌐 Translated Summary ({user_language})")
168
+ st.write(translated)
169
+
170
+ # Citation agent
171
+ elif task == "Generate citation":
172
+ citation_chain = create_stuff_documents_chain(llm, citation_prompt)
173
+ citation = run_chain(citation_chain, {"content": context})
174
+ st.markdown("### πŸ“Œ APA Citation")
175
+ st.code(citation, language="markdown")