chburhan64 commited on
Commit
8539b6e
Β·
verified Β·
1 Parent(s): 296e195

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -82
app.py CHANGED
@@ -1,11 +1,9 @@
1
  import streamlit as st
2
  import os
3
  import time
4
- import matplotlib.pyplot as plt
5
- import pandas as pd
6
- import pdfplumber
7
-
8
  from dotenv import load_dotenv
 
 
9
  from langchain_groq import ChatGroq
10
  from langchain_community.vectorstores import FAISS
11
  from langchain_community.embeddings import HuggingFaceEmbeddings
@@ -19,12 +17,15 @@ from langchain_core.prompts import ChatPromptTemplate
19
  load_dotenv()
20
  groq_api_key = os.getenv("GROQ_API_KEY")
21
 
 
22
  st.set_page_config(page_title="Multi-Agent Research Assistant", layout="wide")
23
  st.title("πŸ€– Multi-Agent Research Assistant")
24
- st.markdown("Upload your PDF research paper and explore multiple intelligent agents: summarize, question-answer, extract visuals, translate, and more!")
25
 
26
- # Load models
27
  llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")
 
 
28
  embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
29
 
30
  # Prompt Templates
@@ -60,131 +61,127 @@ Translate the following content into {language}, preserving meaning and academic
60
  {content}
61
  """)
62
 
63
- # PDF processing
 
 
 
 
 
 
 
64
  def process_pdfs(uploaded_files):
65
  documents = []
66
  for file in uploaded_files:
67
- with pdfplumber.open(file) as pdf:
68
- text = "\n".join(page.extract_text() or "" for page in pdf.pages)
 
 
69
  documents.append(Document(page_content=text, metadata={"source": file.name}))
70
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
71
  return splitter.split_documents(documents)
72
 
 
73
  def create_vector_store(documents):
74
  return FAISS.from_documents(documents, embedding)
75
 
 
76
  def run_chain(chain, input_dict):
77
  return chain.invoke(input_dict)
78
 
 
79
  uploaded_files = st.file_uploader("πŸ“ Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
80
 
81
  if uploaded_files and st.button("πŸ“š Process Documents"):
82
- with st.spinner("Processing and embedding..."):
83
  documents = process_pdfs(uploaded_files)
84
  st.session_state.documents = documents
85
  st.session_state.vectorstore = create_vector_store(documents)
86
  st.success("βœ… Document vector store created!")
87
 
 
88
  if "documents" in st.session_state:
89
- st.subheader("πŸŽ“ Choose an agent task:")
90
- task = st.selectbox("Task:", [
91
  "Summarize document",
92
  "Identify research gaps",
93
  "Suggest research ideas",
94
  "Simulate a debate",
95
  "Generate citation",
96
- "Chat with Paper",
97
- "Generate Chart + Insight"
98
  ])
99
 
100
  if st.button("πŸš€ Run Agent"):
101
  with st.spinner("Running agents..."):
102
  docs = st.session_state.documents[:10]
103
- results = {}
104
 
105
  if task == "Summarize document":
106
  chain = create_stuff_documents_chain(llm, summary_prompt)
107
- summary = run_chain(chain, {"context": docs})
108
- st.session_state["last_agent_output"] = summary
109
 
110
  elif task == "Identify research gaps":
111
- summary = run_chain(create_stuff_documents_chain(llm, summary_prompt), {"context": docs})
112
- gaps = run_chain(LLMChain(llm=llm, prompt=gap_prompt), {"summary": summary})
113
- st.session_state["last_agent_output"] = gaps
 
114
 
115
  elif task == "Suggest research ideas":
116
- summary = run_chain(create_stuff_documents_chain(llm, summary_prompt), {"context": docs})
117
- gaps = run_chain(LLMChain(llm=llm, prompt=gap_prompt), {"summary": summary})
118
- ideas = run_chain(LLMChain(llm=llm, prompt=idea_prompt), {"gaps": gaps})
119
- st.session_state["last_agent_output"] = ideas
 
 
120
 
121
  elif task == "Simulate a debate":
122
- summary = run_chain(create_stuff_documents_chain(llm, summary_prompt), {"context": docs})
123
- debate = run_chain(LLMChain(llm=llm, prompt=debate_prompt), {"summary": summary})
124
- st.session_state["last_agent_output"] = debate
 
125
 
126
  elif task == "Generate citation":
127
- citation_chain = create_stuff_documents_chain(llm, translate_prompt)
128
- citation = run_chain(citation_chain, {"context": docs})
129
- st.session_state["last_agent_output"] = citation
130
-
131
- elif task == "Chat with Paper":
132
- user_question = st.text_input("Ask a question about the paper:")
133
- if user_question:
134
- qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=st.session_state.vectorstore.as_retriever())
135
- answer = qa_chain.run(user_question)
136
- st.session_state["last_agent_output"] = answer
137
-
138
- elif task == "Generate Chart + Insight":
139
- numbers = []
140
- for doc in docs:
141
- for line in doc.page_content.split("\n"):
142
- for word in line.split():
143
- try:
144
- num = float(word)
145
- numbers.append(num)
146
- except:
147
- pass
148
- if numbers:
149
- fig, ax = plt.subplots()
150
- pd.Series(numbers[:20]).plot(kind="bar", ax=ax)
151
- st.pyplot(fig)
152
- explain_prompt = ChatPromptTemplate.from_template("Analyze this data: {data}")
153
- insight = run_chain(LLMChain(llm=llm, prompt=explain_prompt), {"data": numbers[:20]})
154
- st.session_state["last_agent_output"] = insight
155
- else:
156
- st.write("No numeric data found.")
157
-
158
- # Display Output
159
  if "last_agent_output" in st.session_state:
160
- st.markdown("### πŸ€– Agent Output")
161
- st.write(st.session_state["last_agent_output"])
162
-
163
- # Feedback agent (simple RLHF prototype)
164
- st.markdown("#### πŸ’¬ Was this helpful?")
165
- col1, col2 = st.columns(2)
166
- if col1.button("πŸ‘ Yes"):
167
- with open("feedback_log.csv", "a") as f:
168
- f.write(f"{task},Yes\n")
169
- st.success("Thanks for your feedback!")
170
- if col2.button("πŸ‘Ž No"):
171
- with open("feedback_log.csv", "a") as f:
172
- f.write(f"{task},No\n")
173
- st.info("Thanks! We'll improve it.")
174
-
175
- # Translation Option
176
- if st.toggle("🌍 Translate the response?"):
177
  default_languages = ["Spanish", "French", "German", "Chinese", "Urdu", "Other"]
178
- selected_language = st.selectbox("Choose language:", default_languages)
179
  if selected_language == "Other":
180
- user_language = st.text_input("Enter language:")
181
  else:
182
  user_language = selected_language
 
183
  if user_language:
 
 
 
 
 
184
  translate_chain = LLMChain(llm=llm, prompt=translate_prompt)
185
- content = st.session_state["last_agent_output"]
186
- if isinstance(content, dict):
187
- content = "\n".join(str(v) for v in content.values())
188
- translated = translate_chain.invoke({"language": user_language, "content": content})
 
189
  st.markdown(f"### 🌐 Translated Response ({user_language})")
190
  st.write(translated)
 
1
  import streamlit as st
2
  import os
3
  import time
 
 
 
 
4
  from dotenv import load_dotenv
5
+ import PyPDF2
6
+
7
  from langchain_groq import ChatGroq
8
  from langchain_community.vectorstores import FAISS
9
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
17
  load_dotenv()
18
  groq_api_key = os.getenv("GROQ_API_KEY")
19
 
20
+ # Streamlit UI setup
21
  st.set_page_config(page_title="Multi-Agent Research Assistant", layout="wide")
22
  st.title("πŸ€– Multi-Agent Research Assistant")
23
+ st.markdown("Enhance your research process with intelligent summarization, critique, debate, translation, citation, and interactive Q&A. Upload a research paper and let our agents do the thinking!")
24
 
25
+ # Load Groq LLM (Llama3)
26
  llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")
27
+
28
+ # Load embedding model
29
  embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
30
 
31
  # Prompt Templates
 
61
  {content}
62
  """)
63
 
64
+ citation_prompt = ChatPromptTemplate.from_template("""
65
+ Generate an APA-style citation based on the document content:
66
+ <context>
67
+ {context}
68
+ </context>
69
+ """)
70
+
71
+ # Extract & process PDFs
72
  def process_pdfs(uploaded_files):
73
  documents = []
74
  for file in uploaded_files:
75
+ reader = PyPDF2.PdfReader(file)
76
+ text = ""
77
+ for page in reader.pages:
78
+ text += page.extract_text() or ""
79
  documents.append(Document(page_content=text, metadata={"source": file.name}))
80
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
81
  return splitter.split_documents(documents)
82
 
83
+ # Create vector store
84
  def create_vector_store(documents):
85
  return FAISS.from_documents(documents, embedding)
86
 
87
+ # Chain runner helpers
88
  def run_chain(chain, input_dict):
89
  return chain.invoke(input_dict)
90
 
91
+ # File uploader
92
  uploaded_files = st.file_uploader("πŸ“ Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
93
 
94
  if uploaded_files and st.button("πŸ“š Process Documents"):
95
+ with st.spinner("Processing documents and generating vector store..."):
96
  documents = process_pdfs(uploaded_files)
97
  st.session_state.documents = documents
98
  st.session_state.vectorstore = create_vector_store(documents)
99
  st.success("βœ… Document vector store created!")
100
 
101
+ # Agent Activation
102
  if "documents" in st.session_state:
103
+ st.subheader("πŸŽ“ Master Agent: What would you like me to do?")
104
+ task = st.selectbox("Choose a task:", [
105
  "Summarize document",
106
  "Identify research gaps",
107
  "Suggest research ideas",
108
  "Simulate a debate",
109
  "Generate citation",
110
+ "Chat with paper"
 
111
  ])
112
 
113
  if st.button("πŸš€ Run Agent"):
114
  with st.spinner("Running agents..."):
115
  docs = st.session_state.documents[:10]
116
+ output = ""
117
 
118
  if task == "Summarize document":
119
  chain = create_stuff_documents_chain(llm, summary_prompt)
120
+ output = run_chain(chain, {"context": docs})
 
121
 
122
  elif task == "Identify research gaps":
123
+ chain1 = create_stuff_documents_chain(llm, summary_prompt)
124
+ summary = run_chain(chain1, {"context": docs})
125
+ chain2 = LLMChain(llm=llm, prompt=gap_prompt)
126
+ output = run_chain(chain2, {"summary": summary})
127
 
128
  elif task == "Suggest research ideas":
129
+ chain1 = create_stuff_documents_chain(llm, summary_prompt)
130
+ summary = run_chain(chain1, {"context": docs})
131
+ chain2 = LLMChain(llm=llm, prompt=gap_prompt)
132
+ gaps = run_chain(chain2, {"summary": summary})
133
+ chain3 = LLMChain(llm=llm, prompt=idea_prompt)
134
+ output = run_chain(chain3, {"gaps": gaps})
135
 
136
  elif task == "Simulate a debate":
137
+ chain = create_stuff_documents_chain(llm, summary_prompt)
138
+ summary = run_chain(chain, {"context": docs})
139
+ debate_chain = LLMChain(llm=llm, prompt=debate_prompt)
140
+ output = run_chain(debate_chain, {"summary": summary})
141
 
142
  elif task == "Generate citation":
143
+ citation_chain = create_stuff_documents_chain(llm, citation_prompt)
144
+ output = run_chain(citation_chain, {"context": docs})
145
+
146
+ elif task == "Chat with paper":
147
+ query = st.text_input("πŸ’¬ Ask a question about the paper:")
148
+ if query:
149
+ retriever = st.session_state.vectorstore.as_retriever()
150
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
151
+ output = qa_chain.run(query)
152
+
153
+ if output:
154
+ st.session_state["last_agent_output"] = output
155
+
156
+ # Final Display Section with Translation Option
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  if "last_agent_output" in st.session_state:
158
+ output = st.session_state["last_agent_output"]
159
+
160
+ translate_toggle = st.toggle("🌍 Translate the response?")
161
+
162
+ if not translate_toggle:
163
+ st.markdown("### πŸ€– Agent Response")
164
+ st.write(output)
165
+
166
+ if translate_toggle:
 
 
 
 
 
 
 
 
167
  default_languages = ["Spanish", "French", "German", "Chinese", "Urdu", "Other"]
168
+ selected_language = st.selectbox("Choose translation language:", default_languages)
169
  if selected_language == "Other":
170
+ user_language = st.text_input("Please enter your desired language:", key="custom_lang")
171
  else:
172
  user_language = selected_language
173
+
174
  if user_language:
175
+ if isinstance(output, dict):
176
+ combined_text = "\n\n".join(str(v) for v in output.values())
177
+ else:
178
+ combined_text = str(output)
179
+
180
  translate_chain = LLMChain(llm=llm, prompt=translate_prompt)
181
+ translated = translate_chain.invoke({
182
+ "language": user_language,
183
+ "content": combined_text
184
+ })
185
+
186
  st.markdown(f"### 🌐 Translated Response ({user_language})")
187
  st.write(translated)