QuantumLearner commited on
Commit
765eafe
·
verified ·
1 Parent(s): f932c00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +223 -106
app.py CHANGED
@@ -2,187 +2,304 @@ import os
2
  import io
3
  import uuid
4
  import asyncio
5
- import nest_asyncio
6
- import importlib.metadata
7
- import tempfile
8
  from datetime import datetime
9
- from contextlib import redirect_stdout, redirect_stderr
10
 
11
  import streamlit as st
 
12
  from fpdf import FPDF
13
  from gpt_researcher import GPTResearcher
14
 
15
- # ---------- sensible defaults for gpt_researcher env ----------
 
 
 
 
 
 
16
  os.environ.setdefault("LLM_PROVIDER", "openai")
17
  os.environ.setdefault("EMBEDDING_PROVIDER", "openai")
18
  os.environ.setdefault("EMBEDDING_MODEL", "text-embedding-3-small")
 
 
 
 
 
 
 
19
 
20
- # ---------- streamlit base ----------
21
- st.set_page_config(layout="wide")
22
  nest_asyncio.apply()
23
 
24
- # API keys
25
- openai_api_key = os.getenv("OPENAI_API_KEY")
26
- tavily_api_key = os.getenv("TAVILY_API_KEY")
27
- if not openai_api_key or not tavily_api_key:
28
- st.error("API keys for OpenAI or Tavily are not set in the environment variables.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- # ---------- PDF helpers (in-memory only) ----------
31
  class PDF(FPDF):
32
  def header(self):
33
  self.set_font("Arial", "B", 12)
34
  self.cell(0, 10, "Research Report", 0, 1, "C")
 
35
  def footer(self):
36
  self.set_y(-15)
37
  self.set_font("Arial", "I", 8)
38
  self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
39
 
40
- def create_pdf_bytes(report_text: str) -> bytes:
 
 
41
  pdf = PDF()
42
  pdf.add_page()
43
  pdf.set_auto_page_break(auto=True, margin=15)
44
  pdf.set_font("Arial", size=12)
 
45
  for line in report_text.split("\n"):
46
  pdf.multi_cell(0, 10, line.encode("latin-1", "replace").decode("latin-1"))
47
- return pdf.output(dest="S").encode("latin-1")
 
48
 
49
- # ---------- live research with streaming logs (single placeholder, no keys) ----------
50
- async def run_research_streaming(query: str, report_type: str, sources: list, report_source: str, doc_dir: str, logs_placeholder):
 
 
 
 
 
 
 
 
 
51
  buf = io.StringIO()
52
 
53
- # Build researcher
54
- if report_source == "local":
55
- os.environ["DOC_PATH"] = doc_dir
56
- researcher = GPTResearcher(query=query, report_type=report_type, report_source="local")
57
- else:
58
- researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
59
-
60
- # Create an initial visible block for logs (single widget, then we just overwrite it)
61
- logs_placeholder.code("Starting…")
62
 
63
- with redirect_stdout(buf), redirect_stderr(buf):
64
  task = asyncio.create_task(researcher.conduct_research())
65
 
 
66
  while not task.done():
67
- logs = buf.getvalue()
68
- logs_placeholder.code(logs if logs else "Starting…")
69
- await asyncio.sleep(1)
70
 
71
- # ensure final prints are shown
72
  await task
73
- final_logs = buf.getvalue()
74
- logs_placeholder.code(final_logs if final_logs else "Done.")
75
 
76
- # Now write the report
77
- report = await researcher.write_report()
 
 
 
 
 
 
78
 
79
- return report, final_logs
80
 
81
- # ---------- UI ----------
 
 
82
  st.title("GPT Researcher")
83
- st.markdown("""
84
- GPT Researcher is an autonomous agent designed for comprehensive online research tasks. It pulls information from the web or uploaded documents to create detailed, factual research reports.
85
- """)
 
 
 
86
 
87
  with st.expander("Why Use GPT Researcher?", expanded=False):
88
- st.markdown("""
89
- - **Objective & Unbiased**
90
- - **Time-Efficient**
91
- - **Up-to-Date**
92
- - **Comprehensive (2,000+ words)**
93
- - **Reduced Misinformation**
94
- """)
 
95
 
 
96
  st.markdown(
97
  """
98
  <style>
99
- .big-green-font { font-size:20px !important; font-weight:bold; color: green; margin-bottom:-10px; }
100
  .stTextInput > div > input { margin-top:-25px; }
101
  </style>
102
  """,
103
  unsafe_allow_html=True,
104
  )
105
- st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
106
 
 
107
  default_query = "Why is the Stock Price of Nvidia Soaring?"
108
  user_query = st.text_input("", default_query, help="Type your research question or topic.")
109
- final_query = f"{user_query} Current Date is {datetime.now().strftime('%B %Y')}" if user_query else None
 
 
110
 
111
  st.sidebar.title("Research Settings")
 
112
  with st.sidebar.expander("How to Use", expanded=False):
113
- st.markdown("""
114
- 1. **Select Research Type** (Web/Document).
115
- 2. **Enter Research Query**.
116
- 3. **Choose Report Type**.
117
- 4. **Add URLs or Upload Files**.
118
- 5. **Run Research** — watch live logs, then download the PDF.
119
- """)
 
120
 
121
- research_type = st.sidebar.selectbox("Select research type:", ["Web Research", "Document Research"])
122
- report_type = st.sidebar.selectbox("Select report type:", ["research_report", "resource_list", "article_outline"])
 
 
 
 
 
 
 
 
123
 
124
- # always-writable uploads dir
125
- UPLOAD_DIR = os.path.join(tempfile.gettempdir(), "uploads")
126
- os.makedirs(UPLOAD_DIR, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
127
 
 
 
128
  if research_type == "Web Research":
129
- sources_input = st.sidebar.text_area("Enter your sources (optional, comma-separated URLs):")
130
- sources = [u.strip() for u in sources_input.split(",") if u.strip()]
 
 
 
 
131
  else:
132
- uploaded_files = st.sidebar.file_uploader("Upload files for local research:", accept_multiple_files=True)
133
- sources = []
 
 
 
134
  if uploaded_files:
135
- for f in uploaded_files:
136
- with open(os.path.join(UPLOAD_DIR, f.name), "wb") as out:
137
- out.write(f.getbuffer())
 
 
138
 
139
- run_clicked = st.sidebar.button("Run Research")
140
 
141
- # stable placeholders
142
- st.markdown("### Agent Logs")
143
- logs_placeholder = st.empty()
144
- report_placeholder = st.empty()
145
- download_placeholder = st.empty()
146
 
 
 
 
147
  if run_clicked:
148
- if not final_query:
149
  st.warning("Please enter a research query.")
150
  else:
 
151
  os.environ["RETRIEVER"] = "tavily"
152
- src = "local" if research_type == "Document Research" else "web"
153
 
154
- with st.spinner("Running research..."):
155
- report, logs = asyncio.run(
 
 
 
 
 
 
 
 
 
 
 
156
  run_research_streaming(
157
- final_query, report_type, sources, src, UPLOAD_DIR, logs_placeholder
 
 
 
 
158
  )
159
  )
160
- # persist
161
- st.session_state.report = report
162
- st.session_state.logs = logs
163
 
164
- # Render results if available (e.g., after rerun)
 
 
 
 
 
 
165
  if "report" in st.session_state:
166
- report_placeholder.markdown("### Research Report")
167
- report_placeholder.markdown(st.session_state.report)
168
-
169
- pdf_bytes = create_pdf_bytes(st.session_state.report)
170
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
171
- download_placeholder.download_button(
172
- label="Download report as PDF",
173
- data=pdf_bytes,
174
- file_name=f"report_{timestamp}.pdf",
175
- mime="application/pdf",
176
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
- # Keep last logs visible after run / rerun
179
- if "logs" in st.session_state:
180
- logs_placeholder.code(st.session_state.logs)
181
-
182
- # Hide Streamlit chrome
183
- st.markdown("""
184
- <style>
185
- #MainMenu {visibility: hidden;}
186
- footer {visibility: hidden;}
187
- </style>
188
- """, unsafe_allow_html=True)
 
2
  import io
3
  import uuid
4
  import asyncio
 
 
 
5
  from datetime import datetime
6
+ from contextlib import redirect_stdout
7
 
8
  import streamlit as st
9
+ import nest_asyncio
10
  from fpdf import FPDF
11
  from gpt_researcher import GPTResearcher
12
 
13
+
14
+ # -------------------------
15
+ # Page & global configuration
16
+ # -------------------------
17
+ st.set_page_config(layout="wide", page_title="GPT Researcher")
18
+
19
+ # Providers & models — set safe defaults to avoid `o1-preview`
20
  os.environ.setdefault("LLM_PROVIDER", "openai")
21
  os.environ.setdefault("EMBEDDING_PROVIDER", "openai")
22
  os.environ.setdefault("EMBEDDING_MODEL", "text-embedding-3-small")
23
+ os.environ.setdefault("STRATEGIC_LLM", "gpt-4o")
24
+ os.environ.setdefault("SMART_LLM", "gpt-4o-mini")
25
+ # Compatibility aliases some versions of gpt_researcher read
26
+ os.environ.setdefault("STRATEGIC_MODEL", os.environ["STRATEGIC_LLM"])
27
+ os.environ.setdefault("SMART_MODEL", os.environ["SMART_LLM"])
28
+ os.environ.setdefault("STRATEGY_LLM", os.environ["STRATEGIC_LLM"])
29
+ os.environ.setdefault("STRATEGY_MODEL", os.environ["STRATEGIC_LLM"])
30
 
31
+ # Allow asyncio.run inside Streamlit
 
32
  nest_asyncio.apply()
33
 
34
+ # -------------------------
35
+ # Small helpers
36
+ # -------------------------
37
+ def _apply_model_env(strategic_model: str, smart_model: str):
38
+ """Apply model choices to environment for gpt_researcher."""
39
+ for k in ("STRATEGIC_LLM", "STRATEGIC_MODEL", "STRATEGY_LLM", "STRATEGY_MODEL"):
40
+ os.environ[k] = strategic_model
41
+ for k in ("SMART_LLM", "SMART_MODEL"):
42
+ os.environ[k] = smart_model
43
+
44
+ def _clean_logs(text: str) -> str:
45
+ """Optionally hide noisy lines about unavailable models, keep everything else."""
46
+ if not text:
47
+ return text
48
+ bad_bits = [
49
+ "The model `o1-preview` does not exist",
50
+ "`o1-preview` does not exist",
51
+ "model_not_found",
52
+ ]
53
+ lines = []
54
+ for line in text.splitlines():
55
+ if any(b in line for b in bad_bits):
56
+ continue
57
+ lines.append(line)
58
+ return "\n".join(lines)
59
 
 
60
  class PDF(FPDF):
61
  def header(self):
62
  self.set_font("Arial", "B", 12)
63
  self.cell(0, 10, "Research Report", 0, 1, "C")
64
+
65
  def footer(self):
66
  self.set_y(-15)
67
  self.set_font("Arial", "I", 8)
68
  self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
69
 
70
+ def create_pdf(report_text: str) -> str:
71
+ """Write PDF to a unique, writable temp path and return the path."""
72
+ pdf_path = f"/tmp/research_report_{uuid.uuid4().hex}.pdf"
73
  pdf = PDF()
74
  pdf.add_page()
75
  pdf.set_auto_page_break(auto=True, margin=15)
76
  pdf.set_font("Arial", size=12)
77
+ # FPDF is Latin-1; degrade gracefully
78
  for line in report_text.split("\n"):
79
  pdf.multi_cell(0, 10, line.encode("latin-1", "replace").decode("latin-1"))
80
+ pdf.output(pdf_path, "F")
81
+ return pdf_path
82
 
83
+ async def run_research_streaming(
84
+ query: str,
85
+ report_type: str,
86
+ report_source: str,
87
+ sources: list,
88
+ logs_placeholder
89
+ ):
90
+ """
91
+ Run research and stream stdout to the provided placeholder.
92
+ Returns (report_text, final_logs).
93
+ """
94
  buf = io.StringIO()
95
 
96
+ with redirect_stdout(buf):
97
+ # For local/doc research, set DOC_PATH and ensure it exists
98
+ if report_source == "local":
99
+ os.environ["DOC_PATH"] = "./uploads"
100
+ os.makedirs("uploads", exist_ok=True)
101
+ researcher = GPTResearcher(query=query, report_type=report_type, report_source=report_source)
102
+ else:
103
+ researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
 
104
 
105
+ # Kick off the task so we can poll logs while it runs
106
  task = asyncio.create_task(researcher.conduct_research())
107
 
108
+ # Stream logs while the task runs
109
  while not task.done():
110
+ await asyncio.sleep(0.5)
111
+ logs_placeholder.code(_clean_logs(buf.getvalue()) or "Starting…")
 
112
 
113
+ # Ensure exceptions are raised if any
114
  await task
 
 
115
 
116
+ # One final refresh of logs after conduct_research finishes
117
+ logs_placeholder.code(_clean_logs(buf.getvalue()) or "Finalizing…")
118
+
119
+ # Write the report
120
+ report_text = await researcher.write_report()
121
+
122
+ final_logs = buf.getvalue()
123
+ return report_text, final_logs
124
 
 
125
 
126
+ # -------------------------
127
+ # UI
128
+ # -------------------------
129
  st.title("GPT Researcher")
130
+ st.markdown(
131
+ """
132
+ GPT Researcher is an autonomous agent for comprehensive online or local-document research,
133
+ producing detailed, factual reports.
134
+ """
135
+ )
136
 
137
  with st.expander("Why Use GPT Researcher?", expanded=False):
138
+ st.markdown(
139
+ """
140
+ - **Objective & Factual**: Focused on accurate information.
141
+ - **Time-Efficient**: Automates the heavy lifting of research.
142
+ - **Up-to-Date**: Pulls from the web or your uploaded files.
143
+ - **Long-Form Reports**: Capable of 2,000+ word outputs.
144
+ """
145
+ )
146
 
147
+ # Label styling
148
  st.markdown(
149
  """
150
  <style>
151
+ .big-green-font { font-size:20px !important; font-weight:bold; color:green; margin-bottom:-10px; }
152
  .stTextInput > div > input { margin-top:-25px; }
153
  </style>
154
  """,
155
  unsafe_allow_html=True,
156
  )
 
157
 
158
+ st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
159
  default_query = "Why is the Stock Price of Nvidia Soaring?"
160
  user_query = st.text_input("", default_query, help="Type your research question or topic.")
161
+
162
+ current_date = datetime.now().strftime("%B %Y")
163
+ final_query = f"{user_query} Current Date is {current_date}" if user_query else ""
164
 
165
  st.sidebar.title("Research Settings")
166
+
167
  with st.sidebar.expander("How to Use", expanded=False):
168
+ st.markdown(
169
+ """
170
+ 1. Choose **Web** or **Document** research.
171
+ 2. Enter your **query** and pick **report type**.
172
+ 3. Provide URLs **or** upload files (for document research).
173
+ 4. Click **Run Research** — logs stream live; final report + PDF download appear at the end.
174
+ """
175
+ )
176
 
177
+ research_type = st.sidebar.selectbox(
178
+ "Select research type:",
179
+ ["Web Research", "Document Research"],
180
+ help="Choose between web-based research or research from local documents.",
181
+ )
182
+ report_type = st.sidebar.selectbox(
183
+ "Select report type:",
184
+ ["research_report", "resource_list", "article_outline"],
185
+ help="Choose the format of the final report.",
186
+ )
187
 
188
+ # Model choices (so you never hit `o1-preview`)
189
+ with st.sidebar.expander("Model Settings", expanded=False):
190
+ strategic_choice = st.selectbox(
191
+ "Strategic model",
192
+ ["gpt-4o", "gpt-4o-mini"],
193
+ index=0,
194
+ help="Planning/analysis model used by the agent.",
195
+ )
196
+ smart_choice = st.selectbox(
197
+ "Smart model",
198
+ ["gpt-4o-mini", "gpt-4o"],
199
+ index=0,
200
+ help="Cheaper/faster model used by the agent.",
201
+ )
202
 
203
+ # Source inputs
204
+ sources = []
205
  if research_type == "Web Research":
206
+ sources_input = st.sidebar.text_area(
207
+ "Enter your sources (optional, comma-separated URLs):",
208
+ help="Provide a list of URLs separated by commas.",
209
+ )
210
+ if sources_input:
211
+ sources = [u.strip() for u in sources_input.split(",") if u.strip()]
212
  else:
213
+ uploaded_files = st.sidebar.file_uploader(
214
+ "Upload files for local research:",
215
+ accept_multiple_files=True,
216
+ help="Upload documents to analyze.",
217
+ )
218
  if uploaded_files:
219
+ os.makedirs("uploads", exist_ok=True)
220
+ for up in uploaded_files:
221
+ fp = os.path.join("uploads", up.name)
222
+ with open(fp, "wb") as f:
223
+ f.write(up.getbuffer())
224
 
225
+ run_clicked = st.sidebar.button("Run Research", type="primary")
226
 
227
+ # Warn if API keys are missing
228
+ if not os.getenv("OPENAI_API_KEY") or not os.getenv("TAVILY_API_KEY"):
229
+ st.error("OPENAI_API_KEY or TAVILY_API_KEY is not set in environment variables.")
 
 
230
 
231
+ # -------------------------
232
+ # Run the agent (with live logs)
233
+ # -------------------------
234
  if run_clicked:
235
+ if not user_query:
236
  st.warning("Please enter a research query.")
237
  else:
238
+ # Retriever back-end (Tavily)
239
  os.environ["RETRIEVER"] = "tavily"
 
240
 
241
+ # Apply model selections so gpt_researcher never tries `o1-preview`
242
+ _apply_model_env(strategic_choice, smart_choice)
243
+
244
+ # Decide the report source
245
+ report_source = "local" if research_type == "Document Research" else "web"
246
+
247
+ # Live logs area
248
+ st.subheader("Agent Logs (live)")
249
+ live_logs_placeholder = st.empty()
250
+
251
+ with st.spinner("Running research…"):
252
+ # Stream logs while running
253
+ report_text, final_logs = asyncio.run(
254
  run_research_streaming(
255
+ query=final_query,
256
+ report_type=report_type,
257
+ report_source=report_source,
258
+ sources=sources,
259
+ logs_placeholder=live_logs_placeholder,
260
  )
261
  )
 
 
 
262
 
263
+ # Persist results
264
+ st.session_state["report"] = report_text
265
+ st.session_state["logs"] = final_logs
266
+
267
+ # -------------------------
268
+ # Show results (if any)
269
+ # -------------------------
270
  if "report" in st.session_state:
271
+ st.markdown("### Research Report")
272
+ st.markdown(st.session_state["report"])
273
+
274
+ # Create & offer PDF download
275
+ try:
276
+ pdf_path = create_pdf(st.session_state["report"])
277
+ with open(pdf_path, "rb") as pdf_file:
278
+ st.download_button(
279
+ label="Download report as PDF",
280
+ data=pdf_file,
281
+ file_name="report.pdf",
282
+ mime="application/pdf",
283
+ )
284
+ except Exception as e:
285
+ st.warning(f"Could not generate PDF: {e}")
286
+
287
+ # Final logs snapshot (separate from the live stream above)
288
+ st.markdown("### Agent Logs")
289
+ st.text_area(
290
+ "Logs will appear here during/after the research process:",
291
+ value=_clean_logs(st.session_state.get("logs", "")),
292
+ height=220,
293
+ key=f"logs_{uuid.uuid4()}",
294
+ )
295
 
296
+ # Hide default Streamlit footer & menu
297
+ st.markdown(
298
+ """
299
+ <style>
300
+ #MainMenu {visibility: hidden;}
301
+ footer {visibility: hidden;}
302
+ </style>
303
+ """,
304
+ unsafe_allow_html=True,
305
+ )