QuantumLearner commited on
Commit
2c6c229
·
verified ·
1 Parent(s): c443f96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -168
app.py CHANGED
@@ -1,27 +1,43 @@
1
- # --- set a writable doc path BEFORE importing gpt_researcher ---
2
- import os as _os
3
- _os.environ.setdefault("DOC_PATH", "/app/uploads") # or "/tmp/my-docs"
4
- _os.makedirs(_os.environ["DOC_PATH"], exist_ok=True)
5
-
6
- # ---------------------------------------------------------------
7
- import io
8
- import uuid
9
- import asyncio
10
- from datetime import datetime
11
- from contextlib import redirect_stdout
12
-
13
  import streamlit as st
 
 
 
14
  import nest_asyncio
 
 
15
  from fpdf import FPDF
16
- from gpt_researcher import GPTResearcher
17
-
18
- # Streamlit page config
19
- st.set_page_config(layout="wide", page_title="GPT Researcher")
20
 
21
- # Allow asyncio.run in Streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  nest_asyncio.apply()
23
 
24
- # -------- PDF helper --------
25
  class PDF(FPDF):
26
  def header(self):
27
  self.set_font("Arial", "B", 12)
@@ -33,193 +49,171 @@ class PDF(FPDF):
33
  self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
34
 
35
  def create_pdf(report_text: str) -> str:
36
- """Write PDF to a unique temp path and return the path."""
37
- pdf_path = f"/tmp/research_report_{uuid.uuid4().hex}.pdf"
 
38
  pdf = PDF()
39
  pdf.add_page()
40
  pdf.set_auto_page_break(auto=True, margin=15)
41
  pdf.set_font("Arial", size=12)
42
- for line in report_text.split("\n"):
43
- # FPDF is latin-1: degrade gracefully
44
- pdf.multi_cell(0, 10, line.encode("latin-1", "replace").decode("latin-1"))
45
- pdf.output(pdf_path, "F")
46
- return pdf_path
47
 
48
- # -------- live research runner --------
49
- async def run_research_streaming(query: str, report_type: str, report_source: str, sources: list, logs_placeholder):
50
- """
51
- Run research and stream stdout to logs_placeholder.
52
- Returns (report_text, final_logs).
53
- """
54
- buf = io.StringIO()
55
 
56
- with redirect_stdout(buf):
57
- if report_source == "local":
58
- # ensure DOC_PATH exists (already set before import, but keep it safe)
59
- os.makedirs(os.environ["DOC_PATH"], exist_ok=True)
60
- researcher = GPTResearcher(query=query, report_type=report_type, report_source="local")
 
 
 
 
 
61
  else:
62
  researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
63
 
64
- # Start research so we can poll logs
65
- task = asyncio.create_task(researcher.conduct_research())
66
-
67
- # Stream logs while running
68
- while not task.done():
69
- await asyncio.sleep(0.5)
70
- logs = buf.getvalue() or "Starting…"
71
- logs_placeholder.text_area(
72
- "Agent Logs (live)",
73
- value=logs,
74
- height=220,
75
- key=f"live_logs_{uuid.uuid4()}",
76
- )
77
-
78
- # Propagate exceptions if any
79
- await task
80
 
81
- # Final logs refresh
82
- logs_placeholder.text_area(
83
- "Agent Logs (live)",
84
- value=buf.getvalue() or "Finalizing…",
85
- height=220,
86
- key=f"live_logs_final_{uuid.uuid4()}",
87
- )
 
 
88
 
89
- # Write the report
90
- report_text = await researcher.write_report()
91
 
92
- final_logs = buf.getvalue()
93
- return report_text, final_logs
94
 
95
- # ---------------- UI ----------------
96
  st.title("GPT Researcher")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  st.markdown(
98
  """
99
- GPT Researcher is an autonomous agent for web/doc research that produces a detailed, factual report.
100
- """
101
- )
102
-
103
- with st.expander("Why Use GPT Researcher?", expanded=False):
104
- st.markdown(
105
- """
106
- - **Objective & Factual**
107
- - **Time-Efficient**
108
- - **Up-to-Date** (web or uploaded docs)
109
- - **Long-Form Reports** (2,000+ words possible)
110
- """
111
- )
112
-
113
- # Input label with accessibility (hide visually but not empty)
114
- user_query = st.text_input(
115
- "Research query",
116
- "Why is the Stock Price of Nvidia Soaring?",
117
- help="Type your research question or topic.",
118
- label_visibility="collapsed",
119
  )
120
 
121
- current_date = datetime.now().strftime("%B %Y")
122
- final_query = f"{user_query} Current Date is {current_date}" if user_query else ""
123
 
124
- st.sidebar.title("Research Settings")
125
 
126
- research_type = st.sidebar.selectbox(
127
- "Select research type:",
128
- ["Web Research", "Document Research"],
129
- help="Choose web-based research or research from local documents.",
130
- )
131
- report_type = st.sidebar.selectbox(
132
- "Select report type:",
133
- ["research_report", "resource_list", "article_outline"],
134
- help="Choose the format of the final report.",
135
- )
136
 
137
- # Sources / uploads
138
- sources = []
139
- if research_type == "Web Research":
140
- sources_input = st.sidebar.text_area(
141
- "Enter your sources (optional, comma-separated URLs):",
142
- help="Provide a list of URLs separated by commas.",
143
- )
144
- if sources_input:
145
- sources = [u.strip() for u in sources_input.split(",") if u.strip()]
146
  else:
147
- uploaded_files = st.sidebar.file_uploader(
148
- "Upload files for local research:",
149
- accept_multiple_files=True,
150
- help=f"Files are saved to {os.environ['DOC_PATH']}",
151
- )
152
- if uploaded_files:
153
- for up in uploaded_files:
154
- fp = os.path.join(os.environ["DOC_PATH"], up.name)
155
- with open(fp, "wb") as f:
156
- f.write(up.getbuffer())
157
-
158
- # Keys check (optional UI hint)
159
- if not os.getenv("OPENAI_API_KEY") or not os.getenv("TAVILY_API_KEY"):
160
- st.error("OPENAI_API_KEY or TAVILY_API_KEY is not set in environment variables.")
161
 
162
- run_clicked = st.sidebar.button("Run Research", type="primary")
163
 
164
- if run_clicked:
165
- if not user_query:
166
- st.warning("Please enter a research query.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  else:
168
- # Use Tavily retriever (what the original app did)
169
- os.environ["RETRIEVER"] = "tavily"
170
-
171
- report_source = "local" if research_type == "Document Research" else "web"
172
-
173
- st.subheader("Agent Logs")
174
- live_logs_placeholder = st.empty()
175
-
176
- with st.spinner("Running research…"):
177
- report_text, final_logs = asyncio.run(
178
- run_research_streaming(
179
- query=final_query,
180
- report_type=report_type,
181
- report_source=report_source,
182
- sources=sources,
183
- logs_placeholder=live_logs_placeholder,
184
- )
185
- )
186
 
187
- st.session_state["report"] = report_text
188
- st.session_state["logs"] = final_logs
 
 
189
 
190
- # ------------- Results -------------
191
- if "report" in st.session_state:
192
  st.markdown("### Research Report")
193
- st.markdown(st.session_state["report"])
194
 
195
- # Create & offer PDF
196
  try:
197
- pdf_path = create_pdf(st.session_state["report"])
198
  with open(pdf_path, "rb") as pdf_file:
199
  st.download_button(
200
  label="Download report as PDF",
201
  data=pdf_file,
202
  file_name="report.pdf",
203
- mime="application/pdf",
204
  )
205
  except Exception as e:
206
  st.warning(f"Could not generate PDF: {e}")
207
 
208
- st.markdown("### Agent Logs (final)")
209
- st.text_area(
210
- "Logs snapshot after run:",
211
- value=st.session_state.get("logs", ""),
212
- height=220,
213
- key=f"logs_snapshot_{uuid.uuid4()}",
214
- )
 
 
 
 
 
 
 
215
 
216
- # Hide Streamlit footer & menu
217
- st.markdown(
218
- """
219
- <style>
220
- #MainMenu {visibility: hidden;}
221
- footer {visibility: hidden;}
222
- </style>
223
- """,
224
- unsafe_allow_html=True,
225
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import os
3
+ from gpt_researcher import GPTResearcher
4
+ import asyncio
5
  import nest_asyncio
6
+ from contextlib import redirect_stdout
7
+ import io
8
  from fpdf import FPDF
9
+ from datetime import datetime
10
+ import uuid
11
+ import importlib.metadata
 
12
 
13
+ # ---------- version helpers (as in your original) ----------
14
+ def get_version(package_name, module=None):
15
+ try:
16
+ if module and hasattr(module, '__version__'):
17
+ version = module.__version__
18
+ else:
19
+ version = importlib.metadata.version(package_name)
20
+ print(f"{package_name} version: {version}")
21
+ except AttributeError:
22
+ print(f"{package_name} does not have a __version__ attribute.")
23
+ except importlib.metadata.PackageNotFoundError:
24
+ print(f"{package_name} is not installed.")
25
+
26
+ import asyncio as _asyncio
27
+ get_version('streamlit', st)
28
+ get_version('gpt_researcher')
29
+ get_version('nest_asyncio', nest_asyncio)
30
+ get_version('fpdf')
31
+
32
+ print("\nStandard Library Modules:")
33
+ for lib in ['os', 'asyncio', 'contextlib', 'io', 'datetime', 'uuid']:
34
+ print(f"{lib} is part of the Python Standard Library and does not have a separate version number.")
35
+
36
+ # ---------- Streamlit / asyncio setup ----------
37
+ st.set_page_config(layout="wide")
38
  nest_asyncio.apply()
39
 
40
+ # ---------- PDF helper (same logic, safe path) ----------
41
  class PDF(FPDF):
42
  def header(self):
43
  self.set_font("Arial", "B", 12)
 
49
  self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
50
 
51
  def create_pdf(report_text: str) -> str:
52
+ """Create the PDF and return a unique file path under /tmp."""
53
+ os.makedirs("/tmp", exist_ok=True)
54
+ pdf_path = f"/tmp/report_{uuid.uuid4().hex}.pdf"
55
  pdf = PDF()
56
  pdf.add_page()
57
  pdf.set_auto_page_break(auto=True, margin=15)
58
  pdf.set_font("Arial", size=12)
 
 
 
 
 
59
 
60
+ for line in report_text.split('\n'):
61
+ # FPDF is latin-1; degrade gracefully
62
+ pdf.multi_cell(0, 10, line.encode('latin-1', 'replace').decode('latin-1'))
63
+
64
+ pdf.output(pdf_path, 'F')
65
+ return pdf_path
 
66
 
67
+ # ---------- async run (unchanged behavior: capture stdout, return at end) ----------
68
+ async def get_report(query: str, report_type: str, sources: list, report_source: str):
69
+ f = io.StringIO()
70
+ unique_key = str(uuid.uuid4()) # kept from your original (not strictly used)
71
+ with redirect_stdout(f):
72
+ if report_source == 'local':
73
+ # Use a guaranteed-writable dir in containers
74
+ os.environ['DOC_PATH'] = '/tmp/uploads'
75
+ os.makedirs(os.environ['DOC_PATH'], exist_ok=True)
76
+ researcher = GPTResearcher(query=query, report_type=report_type, report_source=report_source)
77
  else:
78
  researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
79
 
80
+ await researcher.conduct_research()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ # Your original polling to avoid infinite loop
83
+ max_attempts = 30
84
+ attempts = 0
85
+ while attempts < max_attempts:
86
+ logs = f.getvalue()
87
+ if "Finalized research step" in logs:
88
+ break
89
+ await asyncio.sleep(1)
90
+ attempts += 1
91
 
92
+ report = await researcher.write_report()
 
93
 
94
+ return report, f.getvalue()
 
95
 
96
+ # ---------- UI (same layout/wording as your original) ----------
97
  st.title("GPT Researcher")
98
+ st.markdown("""
99
+ GPT Researcher is an autonomous agent designed for comprehensive online research tasks.
100
+ It pulls information from the web or uploaded documents to create detailed, factual, research reports.
101
+ """)
102
+ with st.expander("Why Use GPT Researcher?", expanded=False):
103
+ st.markdown("""
104
+ - **Objective and Unbiased**: GPT Researcher focuses on delivering accurate and factual information without bias.
105
+ - **Time-Efficient**: It significantly reduces the time required for manual research tasks.
106
+ - **Up-to-Date Information**: Unlike traditional LLMs, GPT Researcher avoids outdated information and minimizes the risk of hallucinations.
107
+ - **Comprehensive Reports**: Capable of producing long, detailed research reports (2,000+ words).
108
+ - **Reduced Misinformation**: By considering a wide range of sources, it minimizes the risks associated with limited or biased information.
109
+ For more details, visit the [GPT Researcher GitHub repository](https://github.com/assafelovic/gpt-researcher).
110
+ """)
111
+
112
+ # Keep your style tweaks
113
  st.markdown(
114
  """
115
+ <style>
116
+ .big-green-font { font-size:20px !important; font-weight: bold; color: green; margin-bottom: -10px; }
117
+ .stTextInput > div > input { margin-top: -25px; }
118
+ </style>
119
+ """,
120
+ unsafe_allow_html=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  )
122
 
123
+ st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
 
124
 
125
+ default_query = "Why is the Stock Price of Nvidia Soaring?"
126
 
127
+ # Same input — just hide label for accessibility warning
128
+ user_query = st.text_input("", default_query, help="Type your research question or topic.", label_visibility="collapsed")
 
 
 
 
 
 
 
 
129
 
130
+ if user_query:
131
+ current_date = datetime.now().strftime("%B %Y")
132
+ final_query = f"{user_query} Current Date is {current_date}"
 
 
 
 
 
 
133
  else:
134
+ final_query = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ st.sidebar.title("Research Settings")
137
 
138
+ with st.sidebar.expander("How to Use", expanded=False):
139
+ st.markdown("""
140
+ ### How to Use
141
+ 1. **Select Research Type**: Choose between Web Research and Document Research.
142
+ 2. **Enter Research Query**: Type in your research question or topic.
143
+ 3. **Choose Report Type**: Select the format of the report you want (research report, resource list, or article outline).
144
+ 4. **Provide Sources or Upload Files**: For Web Research, you can enter URLs. For Document Research, upload the necessary files.
145
+ 5. **Run Research**: Click the "Run Research" button to start. The logs will update after the run, and the final report will be displayed and available for download as a PDF.
146
+ """)
147
+
148
+ with st.sidebar:
149
+ research_type = st.selectbox("Select research type:", ["Web Research", "Document Research"], help="Choose between web-based research or research from local documents.")
150
+ report_type = st.selectbox("Select report type:", ["research_report", "resource_list", "article_outline"], help="Choose the format of the final report.")
151
+
152
+ if research_type == "Web Research":
153
+ sources_input = st.text_area("Enter your sources (optional, comma-separated URLs):", help="Provide a list of URLs to use as sources, separated by commas.")
154
+ sources = [url.strip() for url in sources_input.split(',') if url.strip()]
155
  else:
156
+ uploaded_files = st.file_uploader("Upload files for local research:", accept_multiple_files=True, help="Upload documents for the research.")
157
+ sources = []
158
+ if uploaded_files:
159
+ # Save uploads to the writable /tmp/uploads
160
+ os.makedirs("/tmp/uploads", exist_ok=True)
161
+ for uploaded_file in uploaded_files:
162
+ file_path = os.path.join("/tmp/uploads", uploaded_file.name)
163
+ with open(file_path, "wb") as f:
164
+ f.write(uploaded_file.getbuffer())
165
+
166
+ if st.button("Run Research"):
167
+ if not user_query:
168
+ st.warning("Please enter a research query.")
169
+ else:
170
+ # Same as your original
171
+ os.environ['RETRIEVER'] = 'tavily'
172
+ report_source = 'local' if research_type == "Document Research" else 'web'
 
173
 
174
+ with st.spinner("Running research..."):
175
+ report, logs = asyncio.run(get_report(final_query, report_type, sources, report_source))
176
+ st.session_state.report = report
177
+ st.session_state.logs = logs
178
 
179
+ # ---------- Results (report + PDF + final logs, same as your original) ----------
180
+ if 'report' in st.session_state:
181
  st.markdown("### Research Report")
182
+ st.markdown(st.session_state.report)
183
 
184
+ # Create PDF safely under /tmp and offer download
185
  try:
186
+ pdf_path = create_pdf(st.session_state.report)
187
  with open(pdf_path, "rb") as pdf_file:
188
  st.download_button(
189
  label="Download report as PDF",
190
  data=pdf_file,
191
  file_name="report.pdf",
192
+ mime="application/pdf"
193
  )
194
  except Exception as e:
195
  st.warning(f"Could not generate PDF: {e}")
196
 
197
+ st.markdown("### Agent Logs")
198
+ if 'logs' in st.session_state:
199
+ st.text_area(
200
+ "Logs will appear here during the research process:",
201
+ value=st.session_state.logs,
202
+ height=200,
203
+ key=f"logs_{uuid.uuid4()}" # same trick you had originally
204
+ )
205
+ else:
206
+ st.text_area(
207
+ "Logs will appear here during the research process",
208
+ height=200,
209
+ key=f"logs_{uuid.uuid4()}"
210
+ )
211
 
212
+ # Hide Streamlit's default footer and menu
213
+ hide_streamlit_style = """
214
+ <style>
215
+ #MainMenu {visibility: hidden;}
216
+ footer {visibility: hidden;}
217
+ </style>
218
+ """
219
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)