QuantumLearner commited on
Commit
e252b93
·
verified ·
1 Parent(s): a89b0c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -76
app.py CHANGED
@@ -6,48 +6,42 @@ import nest_asyncio
6
  import importlib.metadata
7
  import tempfile
8
  from datetime import datetime
9
- from contextlib import redirect_stdout
10
 
11
  import streamlit as st
12
  from fpdf import FPDF
13
  from gpt_researcher import GPTResearcher
14
 
15
- # ---------- sensible defaults to avoid KeyError in gpt_researcher ----------
16
  os.environ.setdefault("LLM_PROVIDER", "openai")
17
  os.environ.setdefault("EMBEDDING_PROVIDER", "openai")
18
  os.environ.setdefault("EMBEDDING_MODEL", "text-embedding-3-small")
19
- # you'll set RETRIEVER later to 'tavily'
20
 
21
- # ---------- version printing (optional) ----------
22
- def get_version(package_name, module=None):
23
  try:
24
- if module and hasattr(module, '__version__'):
25
- version = module.__version__
26
- else:
27
- version = importlib.metadata.version(package_name)
28
- print(f"{package_name} version: {version}")
29
  except Exception:
30
  pass
31
 
32
- get_version('streamlit', st)
33
- get_version('gpt_researcher')
34
- get_version('nest_asyncio', nest_asyncio)
35
- get_version('fpdf')
36
 
37
- print("\nStandard Library Modules:")
38
- for lib in ['os','asyncio','contextlib','io','datetime','uuid','tempfile']:
39
- print(f"{lib} is part of the Python Standard Library.")
40
-
41
- # ---------- streamlit setup ----------
42
  st.set_page_config(layout="wide")
43
  nest_asyncio.apply()
44
 
 
45
  openai_api_key = os.getenv("OPENAI_API_KEY")
46
  tavily_api_key = os.getenv("TAVILY_API_KEY")
47
  if not openai_api_key or not tavily_api_key:
48
  st.error("API keys for OpenAI or Tavily are not set in the environment variables.")
49
 
50
- # ---------- PDF helpers (in-memory, no filesystem writes) ----------
51
  class PDF(FPDF):
52
  def header(self):
53
  self.set_font("Arial", "B", 12)
@@ -63,34 +57,51 @@ def create_pdf_bytes(report_text: str) -> bytes:
63
  pdf.add_page()
64
  pdf.set_auto_page_break(auto=True, margin=15)
65
  pdf.set_font("Arial", size=12)
66
- for line in report_text.split('\n'):
67
- # keep compatibility with latin-1 fonts
68
- pdf.multi_cell(0, 10, line.encode('latin-1', 'replace').decode('latin-1'))
69
- # dest='S' returns a latin-1 str; encode to bytes for download
70
- return pdf.output(dest='S').encode('latin-1')
71
-
72
- # ---------- async research ----------
73
- async def get_report(query: str, report_type: str, sources: list, report_source: str, doc_dir: str):
74
- f = io.StringIO()
75
- with redirect_stdout(f):
76
- if report_source == 'local':
77
- os.environ['DOC_PATH'] = doc_dir # ensure gpt_researcher looks in /tmp/uploads
78
- researcher = GPTResearcher(query=query, report_type=report_type, report_source='local')
79
- else:
80
- researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
81
-
82
- await researcher.conduct_research()
83
-
84
- # simple loop to let logs flush
85
- for _ in range(30):
86
- logs = f.getvalue()
87
- if "Finalized research step" in logs:
88
- break
 
 
 
 
 
 
89
  await asyncio.sleep(1)
90
 
 
 
 
 
 
 
 
 
 
 
 
91
  report = await researcher.write_report()
92
 
93
- return report, f.getvalue()
94
 
95
  # ---------- UI ----------
96
  st.title("GPT Researcher")
@@ -103,7 +114,7 @@ with st.expander("Why Use GPT Researcher?", expanded=False):
103
  - **Objective and Unbiased**: Delivers accurate, factual information.
104
  - **Time-Efficient**: Reduces manual research time.
105
  - **Up-to-Date**: Minimizes outdated info and hallucinations.
106
- - **Comprehensive**: Can produce long, detailed reports (2,000+ words).
107
  - **Reduced Misinformation**: Considers multiple sources.
108
  """)
109
 
@@ -116,16 +127,11 @@ st.markdown(
116
  """,
117
  unsafe_allow_html=True,
118
  )
119
-
120
  st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
 
121
  default_query = "Why is the Stock Price of Nvidia Soaring?"
122
  user_query = st.text_input("", default_query, help="Type your research question or topic.")
123
-
124
- if user_query:
125
- current_date = datetime.now().strftime("%B %Y")
126
- final_query = f"{user_query} Current Date is {current_date}"
127
- else:
128
- final_query = None
129
 
130
  st.sidebar.title("Research Settings")
131
  with st.sidebar.expander("How to Use", expanded=False):
@@ -134,65 +140,79 @@ with st.sidebar.expander("How to Use", expanded=False):
134
  2. **Enter Research Query**.
135
  3. **Choose Report Type**.
136
  4. **Provide Sources or Upload Files**.
137
- 5. **Run Research** and download the PDF.
138
  """)
139
 
140
  research_type = st.sidebar.selectbox("Select research type:", ["Web Research", "Document Research"])
141
  report_type = st.sidebar.selectbox("Select report type:", ["research_report", "resource_list", "article_outline"])
142
 
143
- # use a guaranteed-writable location
144
  UPLOAD_DIR = os.path.join(tempfile.gettempdir(), "uploads")
145
  os.makedirs(UPLOAD_DIR, exist_ok=True)
146
 
147
  if research_type == "Web Research":
148
  sources_input = st.sidebar.text_area("Enter your sources (optional, comma-separated URLs):")
149
- sources = [u.strip() for u in sources_input.split(',') if u.strip()]
150
  else:
151
  uploaded_files = st.sidebar.file_uploader("Upload files for local research:", accept_multiple_files=True)
152
  sources = []
153
  if uploaded_files:
154
- for uploaded_file in uploaded_files:
155
- with open(os.path.join(UPLOAD_DIR, uploaded_file.name), "wb") as f:
156
- f.write(uploaded_file.getbuffer())
157
 
158
  run_clicked = st.sidebar.button("Run Research")
159
 
 
 
 
 
 
 
 
 
160
  if run_clicked:
161
  if not final_query:
162
  st.warning("Please enter a research query.")
163
  else:
164
- # set retriever
165
- os.environ['RETRIEVER'] = 'tavily'
166
- report_source = 'local' if research_type == "Document Research" else 'web'
167
  with st.spinner("Running research..."):
168
- report, logs = asyncio.run(get_report(final_query, report_type, sources, report_source, UPLOAD_DIR))
 
 
 
 
 
169
  st.session_state.report = report
170
  st.session_state.logs = logs
171
 
172
- # ---------- outputs ----------
173
- if 'report' in st.session_state:
174
- st.markdown("### Research Report")
175
- st.markdown(st.session_state.report)
176
 
177
- # in-memory PDF (no filesystem writes)
178
  pdf_bytes = create_pdf_bytes(st.session_state.report)
179
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
180
- st.download_button(
181
  label="Download report as PDF",
182
  data=pdf_bytes,
183
  file_name=f"report_{timestamp}.pdf",
184
  mime="application/pdf",
 
185
  )
186
 
187
- st.markdown("### Agent Logs")
188
- st.text_area(
189
- "Logs will appear here during the research process:",
190
- value=st.session_state.get('logs', ''),
191
- height=200,
192
- key=f"logs_{uuid.uuid4()}",
193
- )
 
194
 
195
- # Hide Streamlit UI chrome
196
  st.markdown("""
197
  <style>
198
  #MainMenu {visibility: hidden;}
 
6
  import importlib.metadata
7
  import tempfile
8
  from datetime import datetime
9
+ from contextlib import redirect_stdout, redirect_stderr
10
 
11
  import streamlit as st
12
  from fpdf import FPDF
13
  from gpt_researcher import GPTResearcher
14
 
15
+ # ---------- sensible defaults for gpt_researcher env ----------
16
  os.environ.setdefault("LLM_PROVIDER", "openai")
17
  os.environ.setdefault("EMBEDDING_PROVIDER", "openai")
18
  os.environ.setdefault("EMBEDDING_MODEL", "text-embedding-3-small")
 
19
 
20
+ # ---------- tiny version printer (optional) ----------
21
+ def get_version(pkg, module=None):
22
  try:
23
+ v = getattr(module, "__version__", None) if module else None
24
+ v = v or importlib.metadata.version(pkg)
25
+ print(f"{pkg} version: {v}")
 
 
26
  except Exception:
27
  pass
28
 
29
+ get_version("streamlit", st)
30
+ get_version("gpt_researcher")
31
+ get_version("nest_asyncio", nest_asyncio)
32
+ get_version("fpdf")
33
 
34
+ # ---------- streamlit base ----------
 
 
 
 
35
  st.set_page_config(layout="wide")
36
  nest_asyncio.apply()
37
 
38
+ # API keys
39
  openai_api_key = os.getenv("OPENAI_API_KEY")
40
  tavily_api_key = os.getenv("TAVILY_API_KEY")
41
  if not openai_api_key or not tavily_api_key:
42
  st.error("API keys for OpenAI or Tavily are not set in the environment variables.")
43
 
44
+ # ---------- PDF helpers (in-memory only) ----------
45
  class PDF(FPDF):
46
  def header(self):
47
  self.set_font("Arial", "B", 12)
 
57
  pdf.add_page()
58
  pdf.set_auto_page_break(auto=True, margin=15)
59
  pdf.set_font("Arial", size=12)
60
+ for line in report_text.split("\n"):
61
+ pdf.multi_cell(0, 10, line.encode("latin-1", "replace").decode("latin-1"))
62
+ # dest='S' returns str; encode for bytes
63
+ return pdf.output(dest="S").encode("latin-1")
64
+
65
+ # ---------- live research with streaming logs ----------
66
+ async def run_research_streaming(query: str, report_type: str, sources: list, report_source: str, doc_dir: str, logs_box):
67
+ buf = io.StringIO()
68
+
69
+ # Build researcher
70
+ if report_source == "local":
71
+ os.environ["DOC_PATH"] = doc_dir
72
+ researcher = GPTResearcher(query=query, report_type=report_type, report_source="local")
73
+ else:
74
+ researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
75
+
76
+ # Run and stream logs every second while the task is pending
77
+ with redirect_stdout(buf), redirect_stderr(buf):
78
+ task = asyncio.create_task(researcher.conduct_research())
79
+
80
+ while not task.done():
81
+ logs = buf.getvalue()
82
+ # update the single stable widget for logs
83
+ logs_box.text_area(
84
+ "Agent Logs (live)",
85
+ value=logs if logs else "Starting…",
86
+ height=240,
87
+ key="live_logs_box",
88
+ )
89
  await asyncio.sleep(1)
90
 
91
+ # ensure any final prints are captured
92
+ await task
93
+ final_logs = buf.getvalue()
94
+ logs_box.text_area(
95
+ "Agent Logs (live)",
96
+ value=final_logs,
97
+ height=240,
98
+ key="live_logs_box",
99
+ )
100
+
101
+ # Now write the report
102
  report = await researcher.write_report()
103
 
104
+ return report, final_logs
105
 
106
  # ---------- UI ----------
107
  st.title("GPT Researcher")
 
114
  - **Objective and Unbiased**: Delivers accurate, factual information.
115
  - **Time-Efficient**: Reduces manual research time.
116
  - **Up-to-Date**: Minimizes outdated info and hallucinations.
117
+ - **Comprehensive**: Produces long, detailed reports (2,000+ words).
118
  - **Reduced Misinformation**: Considers multiple sources.
119
  """)
120
 
 
127
  """,
128
  unsafe_allow_html=True,
129
  )
 
130
  st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
131
+
132
  default_query = "Why is the Stock Price of Nvidia Soaring?"
133
  user_query = st.text_input("", default_query, help="Type your research question or topic.")
134
+ final_query = f"{user_query} Current Date is {datetime.now().strftime('%B %Y')}" if user_query else None
 
 
 
 
 
135
 
136
  st.sidebar.title("Research Settings")
137
  with st.sidebar.expander("How to Use", expanded=False):
 
140
  2. **Enter Research Query**.
141
  3. **Choose Report Type**.
142
  4. **Provide Sources or Upload Files**.
143
+ 5. **Run Research** — watch live logs and download the PDF.
144
  """)
145
 
146
  research_type = st.sidebar.selectbox("Select research type:", ["Web Research", "Document Research"])
147
  report_type = st.sidebar.selectbox("Select report type:", ["research_report", "resource_list", "article_outline"])
148
 
149
+ # always-writable uploads dir
150
  UPLOAD_DIR = os.path.join(tempfile.gettempdir(), "uploads")
151
  os.makedirs(UPLOAD_DIR, exist_ok=True)
152
 
153
  if research_type == "Web Research":
154
  sources_input = st.sidebar.text_area("Enter your sources (optional, comma-separated URLs):")
155
+ sources = [u.strip() for u in sources_input.split(",") if u.strip()]
156
  else:
157
  uploaded_files = st.sidebar.file_uploader("Upload files for local research:", accept_multiple_files=True)
158
  sources = []
159
  if uploaded_files:
160
+ for f in uploaded_files:
161
+ with open(os.path.join(UPLOAD_DIR, f.name), "wb") as out:
162
+ out.write(f.getbuffer())
163
 
164
  run_clicked = st.sidebar.button("Run Research")
165
 
166
+ # live logs placeholder (single stable widget)
167
+ st.markdown("### Agent Logs")
168
+ logs_placeholder = st.empty()
169
+
170
+ # output placeholders
171
+ report_placeholder = st.empty()
172
+ download_placeholder = st.empty()
173
+
174
  if run_clicked:
175
  if not final_query:
176
  st.warning("Please enter a research query.")
177
  else:
178
+ os.environ["RETRIEVER"] = "tavily"
179
+ src = "local" if research_type == "Document Research" else "web"
180
+
181
  with st.spinner("Running research..."):
182
+ report, logs = asyncio.run(
183
+ run_research_streaming(
184
+ final_query, report_type, sources, src, UPLOAD_DIR, logs_placeholder
185
+ )
186
+ )
187
+ # Save to session_state for persistence across reruns
188
  st.session_state.report = report
189
  st.session_state.logs = logs
190
 
191
+ # Display results if we have them (e.g., after rerun)
192
+ if "report" in st.session_state:
193
+ report_placeholder.markdown("### Research Report")
194
+ report_placeholder.markdown(st.session_state.report)
195
 
 
196
  pdf_bytes = create_pdf_bytes(st.session_state.report)
197
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
198
+ download_placeholder.download_button(
199
  label="Download report as PDF",
200
  data=pdf_bytes,
201
  file_name=f"report_{timestamp}.pdf",
202
  mime="application/pdf",
203
+ key="dl_pdf_btn",
204
  )
205
 
206
+ # If logs exist from a previous run, keep them visible
207
+ if "logs" in st.session_state:
208
+ logs_placeholder.text_area(
209
+ "Agent Logs (live)",
210
+ value=st.session_state.logs,
211
+ height=240,
212
+ key="live_logs_box",
213
+ )
214
 
215
+ # Hide Streamlit chrome
216
  st.markdown("""
217
  <style>
218
  #MainMenu {visibility: hidden;}