QuantumLearner commited on
Commit
a89b0c7
·
verified ·
1 Parent(s): 4906b3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -148
app.py CHANGED
@@ -1,16 +1,24 @@
1
- import streamlit as st
2
  import os
3
- from gpt_researcher import GPTResearcher
 
4
  import asyncio
5
  import nest_asyncio
 
 
 
6
  from contextlib import redirect_stdout
7
- import io
 
8
  from fpdf import FPDF
9
- from datetime import datetime
10
- import uuid
11
- import importlib.metadata
 
 
 
 
12
 
13
- # For third-party libraries
14
  def get_version(package_name, module=None):
15
  try:
16
  if module and hasattr(module, '__version__'):
@@ -18,71 +26,28 @@ def get_version(package_name, module=None):
18
  else:
19
  version = importlib.metadata.version(package_name)
20
  print(f"{package_name} version: {version}")
21
- except AttributeError:
22
- print(f"{package_name} does not have a __version__ attribute.")
23
- except importlib.metadata.PackageNotFoundError:
24
- print(f"{package_name} is not installed.")
25
 
26
- # Check versions
27
  get_version('streamlit', st)
28
  get_version('gpt_researcher')
29
  get_version('nest_asyncio', nest_asyncio)
30
  get_version('fpdf')
31
 
32
- # For standard library modules
33
- standard_libs = ['os', 'asyncio', 'contextlib', 'io', 'datetime', 'uuid']
34
  print("\nStandard Library Modules:")
35
- for lib in standard_libs:
36
- print(f"{lib} is part of the Python Standard Library and does not have a separate version number.")
37
-
38
-
39
 
40
-
41
-
42
-
43
- # Apply nest_asyncio for asyncio support in Streamlit
44
  nest_asyncio.apply()
45
 
46
- # Load API keys from environment variables
47
  openai_api_key = os.getenv("OPENAI_API_KEY")
48
  tavily_api_key = os.getenv("TAVILY_API_KEY")
49
-
50
- # Check if the API keys are available
51
  if not openai_api_key or not tavily_api_key:
52
- st.error("API keys for OpenAI or Tavily are not set in the environment variables. Please set them before running the app.")
53
-
54
- # Define the asynchronous function to get the report and capture logs
55
- async def get_report(query: str, report_type: str, sources: list, report_source: str):
56
- f = io.StringIO()
57
- unique_key = str(uuid.uuid4()) # Generate a unique key for this run
58
-
59
- with redirect_stdout(f):
60
- if report_source == 'local':
61
- # Set the DOC_PATH environment variable
62
- os.environ['DOC_PATH'] = './uploads'
63
- researcher = GPTResearcher(query=query, report_type=report_type, report_source=report_source)
64
- else:
65
- researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
66
-
67
- await researcher.conduct_research()
68
-
69
- max_attempts = 30 # Prevent infinite loop
70
- attempts = 0
71
- while attempts < max_attempts:
72
- logs = f.getvalue()
73
-
74
- # Break condition
75
- if "Finalized research step" in logs:
76
- break
77
-
78
- await asyncio.sleep(1) # Update every second
79
- attempts += 1
80
 
81
- report = await researcher.write_report()
82
-
83
- return report, logs
84
-
85
- # Function to create PDF using fpdf with UTF-8 encoding
86
  class PDF(FPDF):
87
  def header(self):
88
  self.set_font("Arial", "B", 12)
@@ -93,143 +58,144 @@ class PDF(FPDF):
93
  self.set_font("Arial", "I", 8)
94
  self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
95
 
96
- def create_pdf(report_text, pdf_path):
97
  pdf = PDF()
98
  pdf.add_page()
99
  pdf.set_auto_page_break(auto=True, margin=15)
100
  pdf.set_font("Arial", size=12)
101
-
102
  for line in report_text.split('\n'):
 
103
  pdf.multi_cell(0, 10, line.encode('latin-1', 'replace').decode('latin-1'))
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- pdf.output(pdf_path, 'F')
106
 
107
- # Streamlit interface
108
- st.set_page_config(layout="wide")
 
 
 
 
 
 
109
 
 
 
 
110
  st.title("GPT Researcher")
111
  st.markdown("""
112
  GPT Researcher is an autonomous agent designed for comprehensive online research tasks. It pulls information from the web or uploaded documents to create detailed, factual, research reports.
113
  """)
 
114
  with st.expander("Why Use GPT Researcher?", expanded=False):
115
  st.markdown("""
116
- - **Objective and Unbiased**: GPT Researcher focuses on delivering accurate and factual information without bias.
117
- - **Time-Efficient**: It significantly reduces the time required for manual research tasks.
118
- - **Up-to-Date Information**: Unlike traditional LLMs, GPT Researcher avoids outdated information and minimizes the risk of hallucinations.
119
- - **Comprehensive Reports**: Capable of producing long, detailed research reports (2,000+ words).
120
- - **Reduced Misinformation**: By considering a wide range of sources, it minimizes the risks associated with limited or biased information.
121
- For more details, visit the [GPT Researcher GitHub repository](https://github.com/assafelovic/gpt-researcher).
122
  """)
123
 
124
- # Custom CSS for styling the input label
125
  st.markdown(
126
  """
127
  <style>
128
- .big-green-font {
129
- font-size:20px !important;
130
- font-weight: bold;
131
- color: green;
132
- margin-bottom: -10px;
133
- }
134
- .stTextInput > div > input {
135
- margin-top: -25px;
136
- }
137
  </style>
138
  """,
139
  unsafe_allow_html=True,
140
  )
141
 
142
  st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
143
-
144
- # Default query with current context
145
  default_query = "Why is the Stock Price of Nvidia Soaring?"
146
-
147
- # Display the input field for the user
148
  user_query = st.text_input("", default_query, help="Type your research question or topic.")
149
 
150
- # Process the query to include the current date after the user inputs their query
151
  if user_query:
152
  current_date = datetime.now().strftime("%B %Y")
153
  final_query = f"{user_query} Current Date is {current_date}"
 
 
154
 
155
  st.sidebar.title("Research Settings")
156
-
157
  with st.sidebar.expander("How to Use", expanded=False):
158
  st.markdown("""
159
- ### How to Use
160
- 1. **Select Research Type**: Choose between Web Research and Document Research.
161
- 2. **Enter Research Query**: Type in your research question or topic.
162
- 3. **Choose Report Type**: Select the format of the report you want (research report, resource list, or article outline).
163
- 4. **Provide Sources or Upload Files**: For Web Research, you can enter URLs. For Document Research, upload the necessary files.
164
- 5. **Run Research**: Click the "Run Research" button to start. The logs will update in real-time, and the final report will be displayed and available for download as a PDF.
165
  """)
166
 
167
- with st.sidebar:
168
- research_type = st.selectbox("Select research type:", ["Web Research", "Document Research"], help="Choose between web-based research or research from local documents.")
169
- report_type = st.selectbox("Select report type:", ["research_report", "resource_list", "article_outline"], help="Choose the format of the final report.")
170
-
171
- if research_type == "Web Research":
172
- sources_input = st.text_area("Enter your sources (optional, comma-separated URLs):", help="Provide a list of URLs to use as sources, separated by commas.")
173
- sources = [url.strip() for url in sources_input.split(',') if url.strip()]
174
- else:
175
- uploaded_files = st.file_uploader("Upload files for local research:", accept_multiple_files=True, help="Upload documents for the research.")
176
- sources = []
177
- if uploaded_files:
178
- os.makedirs("uploads", exist_ok=True)
179
- for uploaded_file in uploaded_files:
180
- file_path = os.path.join("uploads", uploaded_file.name)
181
- with open(file_path, "wb") as f:
182
- f.write(uploaded_file.getbuffer())
183
-
184
- if st.button("Run Research"):
185
- if not user_query:
186
- st.warning("Please enter a research query.")
187
- else:
188
- # Set the retriever environment variable (using Tavily in this case)
189
- os.environ['RETRIEVER'] = 'tavily'
190
-
191
- report_source = 'local' if research_type == "Document Research" else 'web'
192
 
193
- with st.spinner("Running research..."):
194
- # Run the research and get the report and logs using the final_query
195
- report, logs = asyncio.run(get_report(final_query, report_type, sources, report_source))
196
- st.session_state.report = report
197
- st.session_state.logs = logs
198
 
199
- # Display outputs in the main section
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  if 'report' in st.session_state:
201
  st.markdown("### Research Report")
202
  st.markdown(st.session_state.report)
203
-
204
- # Create PDF
205
- pdf_path = "report.pdf"
206
- create_pdf(st.session_state.report, pdf_path)
207
-
208
- # Provide download link for the PDF
209
- with open(pdf_path, "rb") as pdf_file:
210
- st.download_button(
211
- label="Download report as PDF",
212
- data=pdf_file,
213
- file_name="report.pdf",
214
- mime="application/pdf"
215
- )
216
 
217
  st.markdown("### Agent Logs")
218
- if 'logs' in st.session_state:
219
- st.text_area("Logs will appear here during the research process:",
220
- value=st.session_state.logs,
221
- height=200,
222
- key=f"logs_{uuid.uuid4()}")
223
- else:
224
- st.text_area("Logs will appear here during the research process",
225
- height=200,
226
- key=f"logs_{uuid.uuid4()}")
227
 
228
- # Hide Streamlit's default footer and menu
229
- hide_streamlit_style = """
230
  <style>
231
  #MainMenu {visibility: hidden;}
232
  footer {visibility: hidden;}
233
  </style>
234
- """
235
- st.markdown(hide_streamlit_style, unsafe_allow_html=True)
 
 
1
  import os
2
+ import io
3
+ import uuid
4
  import asyncio
5
  import nest_asyncio
6
+ import importlib.metadata
7
+ import tempfile
8
+ from datetime import datetime
9
  from contextlib import redirect_stdout
10
+
11
+ import streamlit as st
12
  from fpdf import FPDF
13
+ from gpt_researcher import GPTResearcher
14
+
15
+ # ---------- sensible defaults to avoid KeyError in gpt_researcher ----------
16
+ os.environ.setdefault("LLM_PROVIDER", "openai")
17
+ os.environ.setdefault("EMBEDDING_PROVIDER", "openai")
18
+ os.environ.setdefault("EMBEDDING_MODEL", "text-embedding-3-small")
19
+ # you'll set RETRIEVER later to 'tavily'
20
 
21
+ # ---------- version printing (optional) ----------
22
  def get_version(package_name, module=None):
23
  try:
24
  if module and hasattr(module, '__version__'):
 
26
  else:
27
  version = importlib.metadata.version(package_name)
28
  print(f"{package_name} version: {version}")
29
+ except Exception:
30
+ pass
 
 
31
 
 
32
  get_version('streamlit', st)
33
  get_version('gpt_researcher')
34
  get_version('nest_asyncio', nest_asyncio)
35
  get_version('fpdf')
36
 
 
 
37
  print("\nStandard Library Modules:")
38
+ for lib in ['os','asyncio','contextlib','io','datetime','uuid','tempfile']:
39
+ print(f"{lib} is part of the Python Standard Library.")
 
 
40
 
41
+ # ---------- streamlit setup ----------
42
+ st.set_page_config(layout="wide")
 
 
43
  nest_asyncio.apply()
44
 
 
45
  openai_api_key = os.getenv("OPENAI_API_KEY")
46
  tavily_api_key = os.getenv("TAVILY_API_KEY")
 
 
47
  if not openai_api_key or not tavily_api_key:
48
+ st.error("API keys for OpenAI or Tavily are not set in the environment variables.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ # ---------- PDF helpers (in-memory, no filesystem writes) ----------
 
 
 
 
51
  class PDF(FPDF):
52
  def header(self):
53
  self.set_font("Arial", "B", 12)
 
58
  self.set_font("Arial", "I", 8)
59
  self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
60
 
61
+ def create_pdf_bytes(report_text: str) -> bytes:
62
  pdf = PDF()
63
  pdf.add_page()
64
  pdf.set_auto_page_break(auto=True, margin=15)
65
  pdf.set_font("Arial", size=12)
 
66
  for line in report_text.split('\n'):
67
+ # keep compatibility with latin-1 fonts
68
  pdf.multi_cell(0, 10, line.encode('latin-1', 'replace').decode('latin-1'))
69
+ # dest='S' returns a latin-1 str; encode to bytes for download
70
+ return pdf.output(dest='S').encode('latin-1')
71
+
72
+ # ---------- async research ----------
73
+ async def get_report(query: str, report_type: str, sources: list, report_source: str, doc_dir: str):
74
+ f = io.StringIO()
75
+ with redirect_stdout(f):
76
+ if report_source == 'local':
77
+ os.environ['DOC_PATH'] = doc_dir # ensure gpt_researcher looks in /tmp/uploads
78
+ researcher = GPTResearcher(query=query, report_type=report_type, report_source='local')
79
+ else:
80
+ researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
81
 
82
+ await researcher.conduct_research()
83
 
84
+ # simple loop to let logs flush
85
+ for _ in range(30):
86
+ logs = f.getvalue()
87
+ if "Finalized research step" in logs:
88
+ break
89
+ await asyncio.sleep(1)
90
+
91
+ report = await researcher.write_report()
92
 
93
+ return report, f.getvalue()
94
+
95
+ # ---------- UI ----------
96
  st.title("GPT Researcher")
97
  st.markdown("""
98
  GPT Researcher is an autonomous agent designed for comprehensive online research tasks. It pulls information from the web or uploaded documents to create detailed, factual, research reports.
99
  """)
100
+
101
  with st.expander("Why Use GPT Researcher?", expanded=False):
102
  st.markdown("""
103
+ - **Objective and Unbiased**: Delivers accurate, factual information.
104
+ - **Time-Efficient**: Reduces manual research time.
105
+ - **Up-to-Date**: Minimizes outdated info and hallucinations.
106
+ - **Comprehensive**: Can produce long, detailed reports (2,000+ words).
107
+ - **Reduced Misinformation**: Considers multiple sources.
 
108
  """)
109
 
 
110
  st.markdown(
111
  """
112
  <style>
113
+ .big-green-font { font-size:20px !important; font-weight:bold; color: green; margin-bottom:-10px; }
114
+ .stTextInput > div > input { margin-top:-25px; }
 
 
 
 
 
 
 
115
  </style>
116
  """,
117
  unsafe_allow_html=True,
118
  )
119
 
120
  st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
 
 
121
  default_query = "Why is the Stock Price of Nvidia Soaring?"
 
 
122
  user_query = st.text_input("", default_query, help="Type your research question or topic.")
123
 
 
124
  if user_query:
125
  current_date = datetime.now().strftime("%B %Y")
126
  final_query = f"{user_query} Current Date is {current_date}"
127
+ else:
128
+ final_query = None
129
 
130
  st.sidebar.title("Research Settings")
 
131
  with st.sidebar.expander("How to Use", expanded=False):
132
  st.markdown("""
133
+ 1. **Select Research Type**: Web or Document Research.
134
+ 2. **Enter Research Query**.
135
+ 3. **Choose Report Type**.
136
+ 4. **Provide Sources or Upload Files**.
137
+ 5. **Run Research** and download the PDF.
 
138
  """)
139
 
140
+ research_type = st.sidebar.selectbox("Select research type:", ["Web Research", "Document Research"])
141
+ report_type = st.sidebar.selectbox("Select report type:", ["research_report", "resource_list", "article_outline"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
+ # use a guaranteed-writable location
144
+ UPLOAD_DIR = os.path.join(tempfile.gettempdir(), "uploads")
145
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
 
 
146
 
147
+ if research_type == "Web Research":
148
+ sources_input = st.sidebar.text_area("Enter your sources (optional, comma-separated URLs):")
149
+ sources = [u.strip() for u in sources_input.split(',') if u.strip()]
150
+ else:
151
+ uploaded_files = st.sidebar.file_uploader("Upload files for local research:", accept_multiple_files=True)
152
+ sources = []
153
+ if uploaded_files:
154
+ for uploaded_file in uploaded_files:
155
+ with open(os.path.join(UPLOAD_DIR, uploaded_file.name), "wb") as f:
156
+ f.write(uploaded_file.getbuffer())
157
+
158
+ run_clicked = st.sidebar.button("Run Research")
159
+
160
+ if run_clicked:
161
+ if not final_query:
162
+ st.warning("Please enter a research query.")
163
+ else:
164
+ # set retriever
165
+ os.environ['RETRIEVER'] = 'tavily'
166
+ report_source = 'local' if research_type == "Document Research" else 'web'
167
+ with st.spinner("Running research..."):
168
+ report, logs = asyncio.run(get_report(final_query, report_type, sources, report_source, UPLOAD_DIR))
169
+ st.session_state.report = report
170
+ st.session_state.logs = logs
171
+
172
+ # ---------- outputs ----------
173
  if 'report' in st.session_state:
174
  st.markdown("### Research Report")
175
  st.markdown(st.session_state.report)
176
+
177
+ # in-memory PDF (no filesystem writes)
178
+ pdf_bytes = create_pdf_bytes(st.session_state.report)
179
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
180
+ st.download_button(
181
+ label="Download report as PDF",
182
+ data=pdf_bytes,
183
+ file_name=f"report_{timestamp}.pdf",
184
+ mime="application/pdf",
185
+ )
 
 
 
186
 
187
  st.markdown("### Agent Logs")
188
+ st.text_area(
189
+ "Logs will appear here during the research process:",
190
+ value=st.session_state.get('logs', ''),
191
+ height=200,
192
+ key=f"logs_{uuid.uuid4()}",
193
+ )
 
 
 
194
 
195
+ # Hide Streamlit UI chrome
196
+ st.markdown("""
197
  <style>
198
  #MainMenu {visibility: hidden;}
199
  footer {visibility: hidden;}
200
  </style>
201
+ """, unsafe_allow_html=True)