QuantumLearner commited on
Commit
c443f96
·
verified ·
1 Parent(s): 0feb25a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -157
app.py CHANGED
@@ -1,4 +1,9 @@
1
- import os
 
 
 
 
 
2
  import io
3
  import uuid
4
  import asyncio
@@ -10,68 +15,13 @@ import nest_asyncio
10
  from fpdf import FPDF
11
  from gpt_researcher import GPTResearcher
12
 
13
-
14
- # -------------------------
15
- # Page & global configuration
16
- # -------------------------
17
  st.set_page_config(layout="wide", page_title="GPT Researcher")
18
 
19
- # Base providers & defaults
20
- os.environ.setdefault("LLM_PROVIDER", "openai")
21
- os.environ.setdefault("EMBEDDING_PROVIDER", "openai")
22
- os.environ.setdefault("EMBEDDING_MODEL", "text-embedding-3-small")
23
-
24
- # IMPORTANT: gpt_researcher expects "<provider>:<model>" for SMART_LLM / FAST_LLM / STRATEGIC_LLM
25
- _provider = os.environ.get("LLM_PROVIDER", "openai")
26
- _default_strategic = "gpt-4o"
27
- _default_smart = "gpt-4o-mini"
28
-
29
- # Seed all the variants some releases look for
30
- def _seed_llm_env(strategic_model: str, smart_model: str, provider: str = _provider):
31
- strategic = f"{provider}:{strategic_model}"
32
- smart = f"{provider}:{smart_model}"
33
- # Required (newer versions check these):
34
- os.environ["STRATEGIC_LLM"] = strategic
35
- os.environ["SMART_LLM"] = smart
36
- os.environ["FAST_LLM"] = smart # alias some builds use
37
-
38
- # Back-compat aliases some releases read:
39
- os.environ["STRATEGY_LLM"] = strategic
40
- os.environ["STRATEGIC_MODEL"] = strategic_model
41
- os.environ["SMART_MODEL"] = smart_model
42
-
43
- # Embeddings (some builds accept both split and combined)
44
- os.environ["EMBEDDING"] = f"{os.environ.get('EMBEDDING_PROVIDER','openai')}:{os.environ.get('EMBEDDING_MODEL','text-embedding-3-small')}"
45
-
46
- _seed_llm_env(_default_strategic, _default_smart)
47
-
48
- # Allow asyncio.run inside Streamlit
49
  nest_asyncio.apply()
50
 
51
-
52
- # -------------------------
53
- # Small helpers
54
- # -------------------------
55
- def _apply_model_env(strategic_model: str, smart_model: str):
56
- """Apply model choices in the provider-qualified format required by gpt_researcher."""
57
- _seed_llm_env(strategic_model, smart_model, provider=os.environ.get("LLM_PROVIDER", "openai"))
58
-
59
- def _clean_logs(text: str) -> str:
60
- """Optionally hide noisy lines about unavailable models, keep everything else."""
61
- if not text:
62
- return text
63
- bad_bits = [
64
- "The model `o1-preview` does not exist",
65
- "`o1-preview` does not exist",
66
- "model_not_found",
67
- ]
68
- lines = []
69
- for line in text.splitlines():
70
- if any(b in line for b in bad_bits):
71
- continue
72
- lines.append(line)
73
- return "\n".join(lines)
74
-
75
  class PDF(FPDF):
76
  def header(self):
77
  self.set_font("Arial", "B", 12)
@@ -83,53 +33,58 @@ class PDF(FPDF):
83
  self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
84
 
85
  def create_pdf(report_text: str) -> str:
86
- """Write PDF to a unique, writable temp path and return the path."""
87
  pdf_path = f"/tmp/research_report_{uuid.uuid4().hex}.pdf"
88
  pdf = PDF()
89
  pdf.add_page()
90
  pdf.set_auto_page_break(auto=True, margin=15)
91
  pdf.set_font("Arial", size=12)
92
- # FPDF is Latin-1; degrade gracefully
93
  for line in report_text.split("\n"):
 
94
  pdf.multi_cell(0, 10, line.encode("latin-1", "replace").decode("latin-1"))
95
  pdf.output(pdf_path, "F")
96
  return pdf_path
97
 
98
- async def run_research_streaming(
99
- query: str,
100
- report_type: str,
101
- report_source: str,
102
- sources: list,
103
- logs_placeholder
104
- ):
105
  """
106
- Run research and stream stdout to the provided placeholder.
107
  Returns (report_text, final_logs).
108
  """
109
  buf = io.StringIO()
110
 
111
  with redirect_stdout(buf):
112
- # For local/doc research, set DOC_PATH and ensure it exists
113
  if report_source == "local":
114
- os.environ["DOC_PATH"] = "./uploads"
115
- os.makedirs("uploads", exist_ok=True)
116
- researcher = GPTResearcher(query=query, report_type=report_type, report_source=report_source)
117
  else:
118
  researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
119
 
120
- # Kick off the task so we can poll logs while it runs
121
  task = asyncio.create_task(researcher.conduct_research())
122
 
123
- # Stream logs while the task runs
124
  while not task.done():
125
  await asyncio.sleep(0.5)
126
- logs_placeholder.code(_clean_logs(buf.getvalue()) or "Starting…")
 
 
 
 
 
 
127
 
128
- # Ensure exceptions are raised if any
129
  await task
130
 
131
- # One final refresh of logs after conduct_research finishes
132
- logs_placeholder.code(_clean_logs(buf.getvalue()) or "Finalizing…")
 
 
 
 
 
133
 
134
  # Write the report
135
  report_text = await researcher.write_report()
@@ -137,62 +92,41 @@ async def run_research_streaming(
137
  final_logs = buf.getvalue()
138
  return report_text, final_logs
139
 
140
-
141
- # -------------------------
142
- # UI
143
- # -------------------------
144
  st.title("GPT Researcher")
145
  st.markdown(
146
  """
147
- GPT Researcher is an autonomous agent for comprehensive online or local-document research,
148
- producing detailed, factual reports.
149
  """
150
  )
151
 
152
  with st.expander("Why Use GPT Researcher?", expanded=False):
153
  st.markdown(
154
  """
155
- - **Objective & Factual**: Focused on accurate information.
156
- - **Time-Efficient**: Automates the heavy lifting of research.
157
- - **Up-to-Date**: Pulls from the web or your uploaded files.
158
- - **Long-Form Reports**: Capable of 2,000+ word outputs.
159
  """
160
  )
161
 
162
- # Label styling
163
- st.markdown(
164
- """
165
- <style>
166
- .big-green-font { font-size:20px !important; font-weight:bold; color:green; margin-bottom:-10px; }
167
- .stTextInput > div > input { margin-top:-25px; }
168
- </style>
169
- """,
170
- unsafe_allow_html=True,
171
  )
172
 
173
- st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
174
- default_query = "Why is the Stock Price of Nvidia Soaring?"
175
- user_query = st.text_input("", default_query, help="Type your research question or topic.")
176
-
177
  current_date = datetime.now().strftime("%B %Y")
178
  final_query = f"{user_query} Current Date is {current_date}" if user_query else ""
179
 
180
  st.sidebar.title("Research Settings")
181
 
182
- with st.sidebar.expander("How to Use", expanded=False):
183
- st.markdown(
184
- """
185
- 1. Choose **Web** or **Document** research.
186
- 2. Enter your **query** and pick **report type**.
187
- 3. Provide URLs **or** upload files (for document research).
188
- 4. Click **Run Research** — logs stream live; final report + PDF download appear at the end.
189
- """
190
- )
191
-
192
  research_type = st.sidebar.selectbox(
193
  "Select research type:",
194
  ["Web Research", "Document Research"],
195
- help="Choose between web-based research or research from local documents.",
196
  )
197
  report_type = st.sidebar.selectbox(
198
  "Select report type:",
@@ -200,22 +134,7 @@ report_type = st.sidebar.selectbox(
200
  help="Choose the format of the final report.",
201
  )
202
 
203
- # Model choices (ensure we never hit `o1-preview`)
204
- with st.sidebar.expander("Model Settings", expanded=False):
205
- strategic_choice = st.selectbox(
206
- "Strategic model",
207
- ["gpt-4o", "gpt-4o-mini"],
208
- index=0,
209
- help="Planning/analysis model used by the agent.",
210
- )
211
- smart_choice = st.selectbox(
212
- "Smart model",
213
- ["gpt-4o-mini", "gpt-4o"],
214
- index=0,
215
- help="Cheaper/faster model used by the agent.",
216
- )
217
-
218
- # Source inputs
219
  sources = []
220
  if research_type == "Web Research":
221
  sources_input = st.sidebar.text_area(
@@ -228,43 +147,33 @@ else:
228
  uploaded_files = st.sidebar.file_uploader(
229
  "Upload files for local research:",
230
  accept_multiple_files=True,
231
- help="Upload documents to analyze.",
232
  )
233
  if uploaded_files:
234
- os.makedirs("uploads", exist_ok=True)
235
  for up in uploaded_files:
236
- fp = os.path.join("uploads", up.name)
237
  with open(fp, "wb") as f:
238
  f.write(up.getbuffer())
239
 
240
- run_clicked = st.sidebar.button("Run Research", type="primary")
241
-
242
- # Warn if API keys are missing
243
  if not os.getenv("OPENAI_API_KEY") or not os.getenv("TAVILY_API_KEY"):
244
  st.error("OPENAI_API_KEY or TAVILY_API_KEY is not set in environment variables.")
245
 
246
- # -------------------------
247
- # Run the agent (with live logs)
248
- # -------------------------
249
  if run_clicked:
250
  if not user_query:
251
  st.warning("Please enter a research query.")
252
  else:
253
- # Retriever back-end (Tavily)
254
  os.environ["RETRIEVER"] = "tavily"
255
 
256
- # Apply model selections so gpt_researcher gets "<provider>:<model>"
257
- _apply_model_env(strategic_choice, smart_choice)
258
-
259
- # Decide the report source
260
  report_source = "local" if research_type == "Document Research" else "web"
261
 
262
- # Live logs area
263
- st.subheader("Agent Logs (live)")
264
  live_logs_placeholder = st.empty()
265
 
266
  with st.spinner("Running research…"):
267
- # Stream logs while running
268
  report_text, final_logs = asyncio.run(
269
  run_research_streaming(
270
  query=final_query,
@@ -275,18 +184,15 @@ if run_clicked:
275
  )
276
  )
277
 
278
- # Persist results
279
  st.session_state["report"] = report_text
280
  st.session_state["logs"] = final_logs
281
 
282
- # -------------------------
283
- # Show results (if any)
284
- # -------------------------
285
  if "report" in st.session_state:
286
  st.markdown("### Research Report")
287
  st.markdown(st.session_state["report"])
288
 
289
- # Create & offer PDF download
290
  try:
291
  pdf_path = create_pdf(st.session_state["report"])
292
  with open(pdf_path, "rb") as pdf_file:
@@ -299,16 +205,15 @@ if "report" in st.session_state:
299
  except Exception as e:
300
  st.warning(f"Could not generate PDF: {e}")
301
 
302
- # Final logs snapshot (separate from the live stream above)
303
- st.markdown("### Agent Logs")
304
  st.text_area(
305
- "Logs will appear here during/after the research process:",
306
- value=_clean_logs(st.session_state.get("logs", "")),
307
  height=220,
308
- key=f"logs_{uuid.uuid4()}",
309
  )
310
 
311
- # Hide default Streamlit footer & menu
312
  st.markdown(
313
  """
314
  <style>
 
1
+ # --- set a writable doc path BEFORE importing gpt_researcher ---
2
+ import os as _os
3
+ _os.environ.setdefault("DOC_PATH", "/app/uploads") # or "/tmp/my-docs"
4
+ _os.makedirs(_os.environ["DOC_PATH"], exist_ok=True)
5
+
6
+ # ---------------------------------------------------------------
7
  import io
8
  import uuid
9
  import asyncio
 
15
  from fpdf import FPDF
16
  from gpt_researcher import GPTResearcher
17
 
18
+ # Streamlit page config
 
 
 
19
  st.set_page_config(layout="wide", page_title="GPT Researcher")
20
 
21
+ # Allow asyncio.run in Streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  nest_asyncio.apply()
23
 
24
+ # -------- PDF helper --------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  class PDF(FPDF):
26
  def header(self):
27
  self.set_font("Arial", "B", 12)
 
33
  self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
34
 
35
  def create_pdf(report_text: str) -> str:
36
+ """Write PDF to a unique temp path and return the path."""
37
  pdf_path = f"/tmp/research_report_{uuid.uuid4().hex}.pdf"
38
  pdf = PDF()
39
  pdf.add_page()
40
  pdf.set_auto_page_break(auto=True, margin=15)
41
  pdf.set_font("Arial", size=12)
 
42
  for line in report_text.split("\n"):
43
+ # FPDF is latin-1: degrade gracefully
44
  pdf.multi_cell(0, 10, line.encode("latin-1", "replace").decode("latin-1"))
45
  pdf.output(pdf_path, "F")
46
  return pdf_path
47
 
48
+ # -------- live research runner --------
49
+ async def run_research_streaming(query: str, report_type: str, report_source: str, sources: list, logs_placeholder):
 
 
 
 
 
50
  """
51
+ Run research and stream stdout to logs_placeholder.
52
  Returns (report_text, final_logs).
53
  """
54
  buf = io.StringIO()
55
 
56
  with redirect_stdout(buf):
 
57
  if report_source == "local":
58
+ # ensure DOC_PATH exists (already set before import, but keep it safe)
59
+ os.makedirs(os.environ["DOC_PATH"], exist_ok=True)
60
+ researcher = GPTResearcher(query=query, report_type=report_type, report_source="local")
61
  else:
62
  researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
63
 
64
+ # Start research so we can poll logs
65
  task = asyncio.create_task(researcher.conduct_research())
66
 
67
+ # Stream logs while running
68
  while not task.done():
69
  await asyncio.sleep(0.5)
70
+ logs = buf.getvalue() or "Starting…"
71
+ logs_placeholder.text_area(
72
+ "Agent Logs (live)",
73
+ value=logs,
74
+ height=220,
75
+ key=f"live_logs_{uuid.uuid4()}",
76
+ )
77
 
78
+ # Propagate exceptions if any
79
  await task
80
 
81
+ # Final logs refresh
82
+ logs_placeholder.text_area(
83
+ "Agent Logs (live)",
84
+ value=buf.getvalue() or "Finalizing…",
85
+ height=220,
86
+ key=f"live_logs_final_{uuid.uuid4()}",
87
+ )
88
 
89
  # Write the report
90
  report_text = await researcher.write_report()
 
92
  final_logs = buf.getvalue()
93
  return report_text, final_logs
94
 
95
+ # ---------------- UI ----------------
 
 
 
96
  st.title("GPT Researcher")
97
  st.markdown(
98
  """
99
+ GPT Researcher is an autonomous agent for web/doc research that produces a detailed, factual report.
 
100
  """
101
  )
102
 
103
  with st.expander("Why Use GPT Researcher?", expanded=False):
104
  st.markdown(
105
  """
106
+ - **Objective & Factual**
107
+ - **Time-Efficient**
108
+ - **Up-to-Date** (web or uploaded docs)
109
+ - **Long-Form Reports** (2,000+ words possible)
110
  """
111
  )
112
 
113
+ # Input label with accessibility (hide visually but not empty)
114
+ user_query = st.text_input(
115
+ "Research query",
116
+ "Why is the Stock Price of Nvidia Soaring?",
117
+ help="Type your research question or topic.",
118
+ label_visibility="collapsed",
 
 
 
119
  )
120
 
 
 
 
 
121
  current_date = datetime.now().strftime("%B %Y")
122
  final_query = f"{user_query} Current Date is {current_date}" if user_query else ""
123
 
124
  st.sidebar.title("Research Settings")
125
 
 
 
 
 
 
 
 
 
 
 
126
  research_type = st.sidebar.selectbox(
127
  "Select research type:",
128
  ["Web Research", "Document Research"],
129
+ help="Choose web-based research or research from local documents.",
130
  )
131
  report_type = st.sidebar.selectbox(
132
  "Select report type:",
 
134
  help="Choose the format of the final report.",
135
  )
136
 
137
+ # Sources / uploads
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  sources = []
139
  if research_type == "Web Research":
140
  sources_input = st.sidebar.text_area(
 
147
  uploaded_files = st.sidebar.file_uploader(
148
  "Upload files for local research:",
149
  accept_multiple_files=True,
150
+ help=f"Files are saved to {os.environ['DOC_PATH']}",
151
  )
152
  if uploaded_files:
 
153
  for up in uploaded_files:
154
+ fp = os.path.join(os.environ["DOC_PATH"], up.name)
155
  with open(fp, "wb") as f:
156
  f.write(up.getbuffer())
157
 
158
+ # Keys check (optional UI hint)
 
 
159
  if not os.getenv("OPENAI_API_KEY") or not os.getenv("TAVILY_API_KEY"):
160
  st.error("OPENAI_API_KEY or TAVILY_API_KEY is not set in environment variables.")
161
 
162
+ run_clicked = st.sidebar.button("Run Research", type="primary")
163
+
 
164
  if run_clicked:
165
  if not user_query:
166
  st.warning("Please enter a research query.")
167
  else:
168
+ # Use Tavily retriever (what the original app did)
169
  os.environ["RETRIEVER"] = "tavily"
170
 
 
 
 
 
171
  report_source = "local" if research_type == "Document Research" else "web"
172
 
173
+ st.subheader("Agent Logs")
 
174
  live_logs_placeholder = st.empty()
175
 
176
  with st.spinner("Running research…"):
 
177
  report_text, final_logs = asyncio.run(
178
  run_research_streaming(
179
  query=final_query,
 
184
  )
185
  )
186
 
 
187
  st.session_state["report"] = report_text
188
  st.session_state["logs"] = final_logs
189
 
190
+ # ------------- Results -------------
 
 
191
  if "report" in st.session_state:
192
  st.markdown("### Research Report")
193
  st.markdown(st.session_state["report"])
194
 
195
+ # Create & offer PDF
196
  try:
197
  pdf_path = create_pdf(st.session_state["report"])
198
  with open(pdf_path, "rb") as pdf_file:
 
205
  except Exception as e:
206
  st.warning(f"Could not generate PDF: {e}")
207
 
208
+ st.markdown("### Agent Logs (final)")
 
209
  st.text_area(
210
+ "Logs snapshot after run:",
211
+ value=st.session_state.get("logs", ""),
212
  height=220,
213
+ key=f"logs_snapshot_{uuid.uuid4()}",
214
  )
215
 
216
+ # Hide Streamlit footer & menu
217
  st.markdown(
218
  """
219
  <style>