Spaces:

QuantumLearner
/

Space12

Sleeping

App Files Files Community

QuantumLearner commited on Aug 18, 2025

Commit

765eafe

verified ·

1 Parent(s): f932c00

Update app.py

Browse files

Files changed (1) hide show

app.py +223 -106

app.py CHANGED Viewed

@@ -2,187 +2,304 @@ import os
 import io
 import uuid
 import asyncio
-import nest_asyncio
-import importlib.metadata
-import tempfile
 from datetime import datetime
-from contextlib import redirect_stdout, redirect_stderr
 import streamlit as st
 from fpdf import FPDF
 from gpt_researcher import GPTResearcher
-# ---------- sensible defaults for gpt_researcher env ----------
 os.environ.setdefault("LLM_PROVIDER", "openai")
 os.environ.setdefault("EMBEDDING_PROVIDER", "openai")
 os.environ.setdefault("EMBEDDING_MODEL", "text-embedding-3-small")
-# ---------- streamlit base ----------
-st.set_page_config(layout="wide")
 nest_asyncio.apply()
-# API keys
-openai_api_key = os.getenv("OPENAI_API_KEY")
-tavily_api_key = os.getenv("TAVILY_API_KEY")
-if not openai_api_key or not tavily_api_key:
-    st.error("API keys for OpenAI or Tavily are not set in the environment variables.")
-# ---------- PDF helpers (in-memory only) ----------
 class PDF(FPDF):
     def header(self):
         self.set_font("Arial", "B", 12)
         self.cell(0, 10, "Research Report", 0, 1, "C")
     def footer(self):
         self.set_y(-15)
         self.set_font("Arial", "I", 8)
         self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
-def create_pdf_bytes(report_text: str) -> bytes:
     pdf = PDF()
     pdf.add_page()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.set_font("Arial", size=12)
     for line in report_text.split("\n"):
         pdf.multi_cell(0, 10, line.encode("latin-1", "replace").decode("latin-1"))
-    return pdf.output(dest="S").encode("latin-1")
-# ---------- live research with streaming logs (single placeholder, no keys) ----------
-async def run_research_streaming(query: str, report_type: str, sources: list, report_source: str, doc_dir: str, logs_placeholder):
     buf = io.StringIO()
-    # Build researcher
-    if report_source == "local":
-        os.environ["DOC_PATH"] = doc_dir
-        researcher = GPTResearcher(query=query, report_type=report_type, report_source="local")
-    else:
-        researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
-    # Create an initial visible block for logs (single widget, then we just overwrite it)
-    logs_placeholder.code("Starting…")
-    with redirect_stdout(buf), redirect_stderr(buf):
         task = asyncio.create_task(researcher.conduct_research())
         while not task.done():
-            logs = buf.getvalue()
-            logs_placeholder.code(logs if logs else "Starting…")
-            await asyncio.sleep(1)
-        # ensure final prints are shown
         await task
-        final_logs = buf.getvalue()
-        logs_placeholder.code(final_logs if final_logs else "Done.")
-        # Now write the report
-        report = await researcher.write_report()
-    return report, final_logs
-# ---------- UI ----------
 st.title("GPT Researcher")
-st.markdown("""
-GPT Researcher is an autonomous agent designed for comprehensive online research tasks. It pulls information from the web or uploaded documents to create detailed, factual research reports.
-""")
 with st.expander("Why Use GPT Researcher?", expanded=False):
-    st.markdown("""
-- **Objective & Unbiased**
-- **Time-Efficient**
-- **Up-to-Date**
-- **Comprehensive (2,000+ words)**
-- **Reduced Misinformation**
-    """)
 st.markdown(
     """
     <style>
-    .big-green-font { font-size:20px !important; font-weight:bold; color: green; margin-bottom:-10px; }
     .stTextInput > div > input { margin-top:-25px; }
     </style>
     """,
     unsafe_allow_html=True,
 )
-st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
 default_query = "Why is the Stock Price of Nvidia Soaring?"
 user_query = st.text_input("", default_query, help="Type your research question or topic.")
-final_query = f"{user_query} Current Date is {datetime.now().strftime('%B %Y')}" if user_query else None
 st.sidebar.title("Research Settings")
 with st.sidebar.expander("How to Use", expanded=False):
-    st.markdown("""
-1. **Select Research Type** (Web/Document).
-2. **Enter Research Query**.
-3. **Choose Report Type**.
-4. **Add URLs or Upload Files**.
-5. **Run Research** — watch live logs, then download the PDF.
-    """)
-research_type = st.sidebar.selectbox("Select research type:", ["Web Research", "Document Research"])
-report_type = st.sidebar.selectbox("Select report type:", ["research_report", "resource_list", "article_outline"])
-# always-writable uploads dir
-UPLOAD_DIR = os.path.join(tempfile.gettempdir(), "uploads")
-os.makedirs(UPLOAD_DIR, exist_ok=True)
 if research_type == "Web Research":
-    sources_input = st.sidebar.text_area("Enter your sources (optional, comma-separated URLs):")
-    sources = [u.strip() for u in sources_input.split(",") if u.strip()]
 else:
-    uploaded_files = st.sidebar.file_uploader("Upload files for local research:", accept_multiple_files=True)
-    sources = []
     if uploaded_files:
-        for f in uploaded_files:
-            with open(os.path.join(UPLOAD_DIR, f.name), "wb") as out:
-                out.write(f.getbuffer())
-run_clicked = st.sidebar.button("Run Research")
-# stable placeholders
-st.markdown("### Agent Logs")
-logs_placeholder = st.empty()
-report_placeholder = st.empty()
-download_placeholder = st.empty()
 if run_clicked:
-    if not final_query:
         st.warning("Please enter a research query.")
     else:
         os.environ["RETRIEVER"] = "tavily"
-        src = "local" if research_type == "Document Research" else "web"
-        with st.spinner("Running research..."):
-            report, logs = asyncio.run(
                 run_research_streaming(
-                    final_query, report_type, sources, src, UPLOAD_DIR, logs_placeholder
                 )
             )
-            # persist
-            st.session_state.report = report
-            st.session_state.logs = logs
-# Render results if available (e.g., after rerun)
 if "report" in st.session_state:
-    report_placeholder.markdown("### Research Report")
-    report_placeholder.markdown(st.session_state.report)
-    pdf_bytes = create_pdf_bytes(st.session_state.report)
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    download_placeholder.download_button(
-        label="Download report as PDF",
-        data=pdf_bytes,
-        file_name=f"report_{timestamp}.pdf",
-        mime="application/pdf",
-    )
-# Keep last logs visible after run / rerun
-if "logs" in st.session_state:
-    logs_placeholder.code(st.session_state.logs)
-# Hide Streamlit chrome
-st.markdown("""
-<style>
-#MainMenu {visibility: hidden;}
-footer {visibility: hidden;}
-</style>
-""", unsafe_allow_html=True)

 import io
 import uuid
 import asyncio
 from datetime import datetime
+from contextlib import redirect_stdout
 import streamlit as st
+import nest_asyncio
 from fpdf import FPDF
 from gpt_researcher import GPTResearcher
+# -------------------------
+# Page & global configuration
+# -------------------------
+st.set_page_config(layout="wide", page_title="GPT Researcher")
+# Providers & models — set safe defaults to avoid `o1-preview`
 os.environ.setdefault("LLM_PROVIDER", "openai")
 os.environ.setdefault("EMBEDDING_PROVIDER", "openai")
 os.environ.setdefault("EMBEDDING_MODEL", "text-embedding-3-small")
+os.environ.setdefault("STRATEGIC_LLM", "gpt-4o")
+os.environ.setdefault("SMART_LLM", "gpt-4o-mini")
+# Compatibility aliases some versions of gpt_researcher read
+os.environ.setdefault("STRATEGIC_MODEL", os.environ["STRATEGIC_LLM"])
+os.environ.setdefault("SMART_MODEL", os.environ["SMART_LLM"])
+os.environ.setdefault("STRATEGY_LLM", os.environ["STRATEGIC_LLM"])
+os.environ.setdefault("STRATEGY_MODEL", os.environ["STRATEGIC_LLM"])
+# Allow asyncio.run inside Streamlit
 nest_asyncio.apply()
+# -------------------------
+# Small helpers
+# -------------------------
+def _apply_model_env(strategic_model: str, smart_model: str):
+    """Apply model choices to environment for gpt_researcher."""
+    for k in ("STRATEGIC_LLM", "STRATEGIC_MODEL", "STRATEGY_LLM", "STRATEGY_MODEL"):
+        os.environ[k] = strategic_model
+    for k in ("SMART_LLM", "SMART_MODEL"):
+        os.environ[k] = smart_model
+def _clean_logs(text: str) -> str:
+    """Optionally hide noisy lines about unavailable models, keep everything else."""
+    if not text:
+        return text
+    bad_bits = [
+        "The model `o1-preview` does not exist",
+        "`o1-preview` does not exist",
+        "model_not_found",
+    ]
+    lines = []
+    for line in text.splitlines():
+        if any(b in line for b in bad_bits):
+            continue
+        lines.append(line)
+    return "\n".join(lines)
 class PDF(FPDF):
     def header(self):
         self.set_font("Arial", "B", 12)
         self.cell(0, 10, "Research Report", 0, 1, "C")
     def footer(self):
         self.set_y(-15)
         self.set_font("Arial", "I", 8)
         self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
+def create_pdf(report_text: str) -> str:
+    """Write PDF to a unique, writable temp path and return the path."""
+    pdf_path = f"/tmp/research_report_{uuid.uuid4().hex}.pdf"
     pdf = PDF()
     pdf.add_page()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.set_font("Arial", size=12)
+    # FPDF is Latin-1; degrade gracefully
     for line in report_text.split("\n"):
         pdf.multi_cell(0, 10, line.encode("latin-1", "replace").decode("latin-1"))
+    pdf.output(pdf_path, "F")
+    return pdf_path
+async def run_research_streaming(
+    query: str,
+    report_type: str,
+    report_source: str,
+    sources: list,
+    logs_placeholder
+):
+    """
+    Run research and stream stdout to the provided placeholder.
+    Returns (report_text, final_logs).
+    """
     buf = io.StringIO()
+    with redirect_stdout(buf):
+        # For local/doc research, set DOC_PATH and ensure it exists
+        if report_source == "local":
+            os.environ["DOC_PATH"] = "./uploads"
+            os.makedirs("uploads", exist_ok=True)
+            researcher = GPTResearcher(query=query, report_type=report_type, report_source=report_source)
+        else:
+            researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
+        # Kick off the task so we can poll logs while it runs
         task = asyncio.create_task(researcher.conduct_research())
+        # Stream logs while the task runs
         while not task.done():
+            await asyncio.sleep(0.5)
+            logs_placeholder.code(_clean_logs(buf.getvalue()) or "Starting…")
+        # Ensure exceptions are raised if any
         await task
+        # One final refresh of logs after conduct_research finishes
+        logs_placeholder.code(_clean_logs(buf.getvalue()) or "Finalizing…")
+        # Write the report
+        report_text = await researcher.write_report()
+    final_logs = buf.getvalue()
+    return report_text, final_logs
+# -------------------------
+# UI
+# -------------------------
 st.title("GPT Researcher")
+st.markdown(
+    """
+GPT Researcher is an autonomous agent for comprehensive online or local-document research,
+producing detailed, factual reports.
+"""
+)
 with st.expander("Why Use GPT Researcher?", expanded=False):
+    st.markdown(
+        """
+- **Objective & Factual**: Focused on accurate information.
+- **Time-Efficient**: Automates the heavy lifting of research.
+- **Up-to-Date**: Pulls from the web or your uploaded files.
+- **Long-Form Reports**: Capable of 2,000+ word outputs.
+"""
+    )
+# Label styling
 st.markdown(
     """
     <style>
+    .big-green-font { font-size:20px !important; font-weight:bold; color:green; margin-bottom:-10px; }
     .stTextInput > div > input { margin-top:-25px; }
     </style>
     """,
     unsafe_allow_html=True,
 )
+st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
 default_query = "Why is the Stock Price of Nvidia Soaring?"
 user_query = st.text_input("", default_query, help="Type your research question or topic.")
+current_date = datetime.now().strftime("%B %Y")
+final_query = f"{user_query} Current Date is {current_date}" if user_query else ""
 st.sidebar.title("Research Settings")
 with st.sidebar.expander("How to Use", expanded=False):
+    st.markdown(
+        """
+1. Choose **Web** or **Document** research.
+2. Enter your **query** and pick **report type**.
+3. Provide URLs **or** upload files (for document research).
+4. Click **Run Research** — logs stream live; final report + PDF download appear at the end.
+"""
+    )
+research_type = st.sidebar.selectbox(
+    "Select research type:",
+    ["Web Research", "Document Research"],
+    help="Choose between web-based research or research from local documents.",
+)
+report_type = st.sidebar.selectbox(
+    "Select report type:",
+    ["research_report", "resource_list", "article_outline"],
+    help="Choose the format of the final report.",
+)
+# Model choices (so you never hit `o1-preview`)
+with st.sidebar.expander("Model Settings", expanded=False):
+    strategic_choice = st.selectbox(
+        "Strategic model",
+        ["gpt-4o", "gpt-4o-mini"],
+        index=0,
+        help="Planning/analysis model used by the agent.",
+    )
+    smart_choice = st.selectbox(
+        "Smart model",
+        ["gpt-4o-mini", "gpt-4o"],
+        index=0,
+        help="Cheaper/faster model used by the agent.",
+    )
+# Source inputs
+sources = []
 if research_type == "Web Research":
+    sources_input = st.sidebar.text_area(
+        "Enter your sources (optional, comma-separated URLs):",
+        help="Provide a list of URLs separated by commas.",
+    )
+    if sources_input:
+        sources = [u.strip() for u in sources_input.split(",") if u.strip()]
 else:
+    uploaded_files = st.sidebar.file_uploader(
+        "Upload files for local research:",
+        accept_multiple_files=True,
+        help="Upload documents to analyze.",
+    )
     if uploaded_files:
+        os.makedirs("uploads", exist_ok=True)
+        for up in uploaded_files:
+            fp = os.path.join("uploads", up.name)
+            with open(fp, "wb") as f:
+                f.write(up.getbuffer())
+run_clicked = st.sidebar.button("Run Research", type="primary")
+# Warn if API keys are missing
+if not os.getenv("OPENAI_API_KEY") or not os.getenv("TAVILY_API_KEY"):
+    st.error("OPENAI_API_KEY or TAVILY_API_KEY is not set in environment variables.")
+# -------------------------
+# Run the agent (with live logs)
+# -------------------------
 if run_clicked:
+    if not user_query:
         st.warning("Please enter a research query.")
     else:
+        # Retriever back-end (Tavily)
         os.environ["RETRIEVER"] = "tavily"
+        # Apply model selections so gpt_researcher never tries `o1-preview`
+        _apply_model_env(strategic_choice, smart_choice)
+        # Decide the report source
+        report_source = "local" if research_type == "Document Research" else "web"
+        # Live logs area
+        st.subheader("Agent Logs (live)")
+        live_logs_placeholder = st.empty()
+        with st.spinner("Running research…"):
+            # Stream logs while running
+            report_text, final_logs = asyncio.run(
                 run_research_streaming(
+                    query=final_query,
+                    report_type=report_type,
+                    report_source=report_source,
+                    sources=sources,
+                    logs_placeholder=live_logs_placeholder,
                 )
             )
+        # Persist results
+        st.session_state["report"] = report_text
+        st.session_state["logs"] = final_logs
+# -------------------------
+# Show results (if any)
+# -------------------------
 if "report" in st.session_state:
+    st.markdown("### Research Report")
+    st.markdown(st.session_state["report"])
+    # Create & offer PDF download
+    try:
+        pdf_path = create_pdf(st.session_state["report"])
+        with open(pdf_path, "rb") as pdf_file:
+            st.download_button(
+                label="Download report as PDF",
+                data=pdf_file,
+                file_name="report.pdf",
+                mime="application/pdf",
+            )
+    except Exception as e:
+        st.warning(f"Could not generate PDF: {e}")
+# Final logs snapshot (separate from the live stream above)
+st.markdown("### Agent Logs")
+st.text_area(
+    "Logs will appear here during/after the research process:",
+    value=_clean_logs(st.session_state.get("logs", "")),
+    height=220,
+    key=f"logs_{uuid.uuid4()}",
+)
+# Hide default Streamlit footer & menu
+st.markdown(
+    """
+    <style>
+    #MainMenu {visibility: hidden;}
+    footer {visibility: hidden;}
+    </style>
+    """,
+    unsafe_allow_html=True,
+)