Spaces:

QuantumLearner
/

Space12

Sleeping

App Files Files Community

QuantumLearner commited on Aug 18, 2025

Commit

a89b0c7

verified ·

1 Parent(s): 4906b3d

Update app.py

Browse files

Files changed (1) hide show

app.py +114 -148

app.py CHANGED Viewed

@@ -1,16 +1,24 @@
-import streamlit as st
 import os
-from gpt_researcher import GPTResearcher
 import asyncio
 import nest_asyncio
 from contextlib import redirect_stdout
-import io
 from fpdf import FPDF
-from datetime import datetime
-import uuid
-import importlib.metadata
-# For third-party libraries
 def get_version(package_name, module=None):
     try:
         if module and hasattr(module, '__version__'):
@@ -18,71 +26,28 @@ def get_version(package_name, module=None):
         else:
             version = importlib.metadata.version(package_name)
         print(f"{package_name} version: {version}")
-    except AttributeError:
-        print(f"{package_name} does not have a __version__ attribute.")
-    except importlib.metadata.PackageNotFoundError:
-        print(f"{package_name} is not installed.")
-# Check versions
 get_version('streamlit', st)
 get_version('gpt_researcher')
 get_version('nest_asyncio', nest_asyncio)
 get_version('fpdf')
-# For standard library modules
-standard_libs = ['os', 'asyncio', 'contextlib', 'io', 'datetime', 'uuid']
 print("\nStandard Library Modules:")
-for lib in standard_libs:
-    print(f"{lib} is part of the Python Standard Library and does not have a separate version number.")
-# Apply nest_asyncio for asyncio support in Streamlit
 nest_asyncio.apply()
-# Load API keys from environment variables
 openai_api_key = os.getenv("OPENAI_API_KEY")
 tavily_api_key = os.getenv("TAVILY_API_KEY")
-# Check if the API keys are available
 if not openai_api_key or not tavily_api_key:
-    st.error("API keys for OpenAI or Tavily are not set in the environment variables. Please set them before running the app.")
-# Define the asynchronous function to get the report and capture logs
-async def get_report(query: str, report_type: str, sources: list, report_source: str):
-    f = io.StringIO()
-    unique_key = str(uuid.uuid4())  # Generate a unique key for this run
-    with redirect_stdout(f):
-        if report_source == 'local':
-            # Set the DOC_PATH environment variable
-            os.environ['DOC_PATH'] = './uploads'
-            researcher = GPTResearcher(query=query, report_type=report_type, report_source=report_source)
-        else:
-            researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
-        await researcher.conduct_research()
-        max_attempts = 30  # Prevent infinite loop
-        attempts = 0
-        while attempts < max_attempts:
-            logs = f.getvalue()
-            # Break condition
-            if "Finalized research step" in logs:
-                break
-            await asyncio.sleep(1)  # Update every second
-            attempts += 1
-        report = await researcher.write_report()
-    return report, logs
-# Function to create PDF using fpdf with UTF-8 encoding
 class PDF(FPDF):
     def header(self):
         self.set_font("Arial", "B", 12)
@@ -93,143 +58,144 @@ class PDF(FPDF):
         self.set_font("Arial", "I", 8)
         self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
-def create_pdf(report_text, pdf_path):
     pdf = PDF()
     pdf.add_page()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.set_font("Arial", size=12)
     for line in report_text.split('\n'):
         pdf.multi_cell(0, 10, line.encode('latin-1', 'replace').decode('latin-1'))
-    pdf.output(pdf_path, 'F')
-# Streamlit interface
-st.set_page_config(layout="wide")
 st.title("GPT Researcher")
 st.markdown("""
 GPT Researcher is an autonomous agent designed for comprehensive online research tasks. It pulls information from the web or uploaded documents to create detailed, factual, research reports.
 """)
 with st.expander("Why Use GPT Researcher?", expanded=False):
     st.markdown("""
-    - **Objective and Unbiased**: GPT Researcher focuses on delivering accurate and factual information without bias.
-    - **Time-Efficient**: It significantly reduces the time required for manual research tasks.
-    - **Up-to-Date Information**: Unlike traditional LLMs, GPT Researcher avoids outdated information and minimizes the risk of hallucinations.
-    - **Comprehensive Reports**: Capable of producing long, detailed research reports (2,000+ words).
-    - **Reduced Misinformation**: By considering a wide range of sources, it minimizes the risks associated with limited or biased information.
-    For more details, visit the [GPT Researcher GitHub repository](https://github.com/assafelovic/gpt-researcher).
     """)
-# Custom CSS for styling the input label
 st.markdown(
     """
     <style>
-    .big-green-font {
-        font-size:20px !important;
-        font-weight: bold;
-        color: green;
-        margin-bottom: -10px;
-    }
-    .stTextInput > div > input {
-        margin-top: -25px;
-    }
     </style>
     """,
     unsafe_allow_html=True,
 )
 st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
-# Default query with current context
 default_query = "Why is the Stock Price of Nvidia Soaring?"
-# Display the input field for the user
 user_query = st.text_input("", default_query, help="Type your research question or topic.")
-# Process the query to include the current date after the user inputs their query
 if user_query:
     current_date = datetime.now().strftime("%B %Y")
     final_query = f"{user_query} Current Date is {current_date}"
 st.sidebar.title("Research Settings")
 with st.sidebar.expander("How to Use", expanded=False):
     st.markdown("""
-    ### How to Use
-    1. **Select Research Type**: Choose between Web Research and Document Research.
-    2. **Enter Research Query**: Type in your research question or topic.
-    3. **Choose Report Type**: Select the format of the report you want (research report, resource list, or article outline).
-    4. **Provide Sources or Upload Files**: For Web Research, you can enter URLs. For Document Research, upload the necessary files.
-    5. **Run Research**: Click the "Run Research" button to start. The logs will update in real-time, and the final report will be displayed and available for download as a PDF.
     """)
-with st.sidebar:
-    research_type = st.selectbox("Select research type:", ["Web Research", "Document Research"], help="Choose between web-based research or research from local documents.")
-    report_type = st.selectbox("Select report type:", ["research_report", "resource_list", "article_outline"], help="Choose the format of the final report.")
-    if research_type == "Web Research":
-        sources_input = st.text_area("Enter your sources (optional, comma-separated URLs):", help="Provide a list of URLs to use as sources, separated by commas.")
-        sources = [url.strip() for url in sources_input.split(',') if url.strip()]
-    else:
-        uploaded_files = st.file_uploader("Upload files for local research:", accept_multiple_files=True, help="Upload documents for the research.")
-        sources = []
-        if uploaded_files:
-            os.makedirs("uploads", exist_ok=True)
-            for uploaded_file in uploaded_files:
-                file_path = os.path.join("uploads", uploaded_file.name)
-                with open(file_path, "wb") as f:
-                    f.write(uploaded_file.getbuffer())
-    if st.button("Run Research"):
-        if not user_query:
-            st.warning("Please enter a research query.")
-        else:
-            # Set the retriever environment variable (using Tavily in this case)
-            os.environ['RETRIEVER'] = 'tavily'
-            report_source = 'local' if research_type == "Document Research" else 'web'
-            with st.spinner("Running research..."):
-                # Run the research and get the report and logs using the final_query
-                report, logs = asyncio.run(get_report(final_query, report_type, sources, report_source))
-                st.session_state.report = report
-                st.session_state.logs = logs
-# Display outputs in the main section
 if 'report' in st.session_state:
     st.markdown("### Research Report")
     st.markdown(st.session_state.report)
-    # Create PDF
-    pdf_path = "report.pdf"
-    create_pdf(st.session_state.report, pdf_path)
-    # Provide download link for the PDF
-    with open(pdf_path, "rb") as pdf_file:
-        st.download_button(
-            label="Download report as PDF",
-            data=pdf_file,
-            file_name="report.pdf",
-            mime="application/pdf"
-        )
 st.markdown("### Agent Logs")
-if 'logs' in st.session_state:
-    st.text_area("Logs will appear here during the research process:",
-                 value=st.session_state.logs,
-                 height=200,
-                 key=f"logs_{uuid.uuid4()}")
-else:
-    st.text_area("Logs will appear here during the research process",
-                 height=200,
-                 key=f"logs_{uuid.uuid4()}")
-# Hide Streamlit's default footer and menu
-hide_streamlit_style = """
 <style>
 #MainMenu {visibility: hidden;}
 footer {visibility: hidden;}
 </style>
-"""
-st.markdown(hide_streamlit_style, unsafe_allow_html=True)

 import os
+import io
+import uuid
 import asyncio
 import nest_asyncio
+import importlib.metadata
+import tempfile
+from datetime import datetime
 from contextlib import redirect_stdout
+import streamlit as st
 from fpdf import FPDF
+from gpt_researcher import GPTResearcher
+# ---------- sensible defaults to avoid KeyError in gpt_researcher ----------
+os.environ.setdefault("LLM_PROVIDER", "openai")
+os.environ.setdefault("EMBEDDING_PROVIDER", "openai")
+os.environ.setdefault("EMBEDDING_MODEL", "text-embedding-3-small")
+# you'll set RETRIEVER later to 'tavily'
+# ---------- version printing (optional) ----------
 def get_version(package_name, module=None):
     try:
         if module and hasattr(module, '__version__'):
         else:
             version = importlib.metadata.version(package_name)
         print(f"{package_name} version: {version}")
+    except Exception:
+        pass
 get_version('streamlit', st)
 get_version('gpt_researcher')
 get_version('nest_asyncio', nest_asyncio)
 get_version('fpdf')
 print("\nStandard Library Modules:")
+for lib in ['os','asyncio','contextlib','io','datetime','uuid','tempfile']:
+    print(f"{lib} is part of the Python Standard Library.")
+# ---------- streamlit setup ----------
+st.set_page_config(layout="wide")
 nest_asyncio.apply()
 openai_api_key = os.getenv("OPENAI_API_KEY")
 tavily_api_key = os.getenv("TAVILY_API_KEY")
 if not openai_api_key or not tavily_api_key:
+    st.error("API keys for OpenAI or Tavily are not set in the environment variables.")
+# ---------- PDF helpers (in-memory, no filesystem writes) ----------
 class PDF(FPDF):
     def header(self):
         self.set_font("Arial", "B", 12)
         self.set_font("Arial", "I", 8)
         self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
+def create_pdf_bytes(report_text: str) -> bytes:
     pdf = PDF()
     pdf.add_page()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.set_font("Arial", size=12)
     for line in report_text.split('\n'):
+        # keep compatibility with latin-1 fonts
         pdf.multi_cell(0, 10, line.encode('latin-1', 'replace').decode('latin-1'))
+    # dest='S' returns a latin-1 str; encode to bytes for download
+    return pdf.output(dest='S').encode('latin-1')
+# ---------- async research ----------
+async def get_report(query: str, report_type: str, sources: list, report_source: str, doc_dir: str):
+    f = io.StringIO()
+    with redirect_stdout(f):
+        if report_source == 'local':
+            os.environ['DOC_PATH'] = doc_dir  # ensure gpt_researcher looks in /tmp/uploads
+            researcher = GPTResearcher(query=query, report_type=report_type, report_source='local')
+        else:
+            researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
+        await researcher.conduct_research()
+        # simple loop to let logs flush
+        for _ in range(30):
+            logs = f.getvalue()
+            if "Finalized research step" in logs:
+                break
+            await asyncio.sleep(1)
+        report = await researcher.write_report()
+    return report, f.getvalue()
+# ---------- UI ----------
 st.title("GPT Researcher")
 st.markdown("""
 GPT Researcher is an autonomous agent designed for comprehensive online research tasks. It pulls information from the web or uploaded documents to create detailed, factual, research reports.
 """)
 with st.expander("Why Use GPT Researcher?", expanded=False):
     st.markdown("""
+- **Objective and Unbiased**: Delivers accurate, factual information.
+- **Time-Efficient**: Reduces manual research time.
+- **Up-to-Date**: Minimizes outdated info and hallucinations.
+- **Comprehensive**: Can produce long, detailed reports (2,000+ words).
+- **Reduced Misinformation**: Considers multiple sources.
     """)
 st.markdown(
     """
     <style>
+    .big-green-font { font-size:20px !important; font-weight:bold; color: green; margin-bottom:-10px; }
+    .stTextInput > div > input { margin-top:-25px; }
     </style>
     """,
     unsafe_allow_html=True,
 )
 st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
 default_query = "Why is the Stock Price of Nvidia Soaring?"
 user_query = st.text_input("", default_query, help="Type your research question or topic.")
 if user_query:
     current_date = datetime.now().strftime("%B %Y")
     final_query = f"{user_query} Current Date is {current_date}"
+else:
+    final_query = None
 st.sidebar.title("Research Settings")
 with st.sidebar.expander("How to Use", expanded=False):
     st.markdown("""
+1. **Select Research Type**: Web or Document Research.
+2. **Enter Research Query**.
+3. **Choose Report Type**.
+4. **Provide Sources or Upload Files**.
+5. **Run Research** and download the PDF.
     """)
+research_type = st.sidebar.selectbox("Select research type:", ["Web Research", "Document Research"])
+report_type = st.sidebar.selectbox("Select report type:", ["research_report", "resource_list", "article_outline"])
+# use a guaranteed-writable location
+UPLOAD_DIR = os.path.join(tempfile.gettempdir(), "uploads")
+os.makedirs(UPLOAD_DIR, exist_ok=True)
+if research_type == "Web Research":
+    sources_input = st.sidebar.text_area("Enter your sources (optional, comma-separated URLs):")
+    sources = [u.strip() for u in sources_input.split(',') if u.strip()]
+else:
+    uploaded_files = st.sidebar.file_uploader("Upload files for local research:", accept_multiple_files=True)
+    sources = []
+    if uploaded_files:
+        for uploaded_file in uploaded_files:
+            with open(os.path.join(UPLOAD_DIR, uploaded_file.name), "wb") as f:
+                f.write(uploaded_file.getbuffer())
+run_clicked = st.sidebar.button("Run Research")
+if run_clicked:
+    if not final_query:
+        st.warning("Please enter a research query.")
+    else:
+        # set retriever
+        os.environ['RETRIEVER'] = 'tavily'
+        report_source = 'local' if research_type == "Document Research" else 'web'
+        with st.spinner("Running research..."):
+            report, logs = asyncio.run(get_report(final_query, report_type, sources, report_source, UPLOAD_DIR))
+            st.session_state.report = report
+            st.session_state.logs = logs
+# ---------- outputs ----------
 if 'report' in st.session_state:
     st.markdown("### Research Report")
     st.markdown(st.session_state.report)
+    # in-memory PDF (no filesystem writes)
+    pdf_bytes = create_pdf_bytes(st.session_state.report)
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    st.download_button(
+        label="Download report as PDF",
+        data=pdf_bytes,
+        file_name=f"report_{timestamp}.pdf",
+        mime="application/pdf",
+    )
 st.markdown("### Agent Logs")
+st.text_area(
+    "Logs will appear here during the research process:",
+    value=st.session_state.get('logs', ''),
+    height=200,
+    key=f"logs_{uuid.uuid4()}",
+)
+# Hide Streamlit UI chrome
+st.markdown("""
 <style>
 #MainMenu {visibility: hidden;}
 footer {visibility: hidden;}
 </style>
+""", unsafe_allow_html=True)