Spaces:

QuantumLearner
/

Space12

Sleeping

App Files Files Community

QuantumLearner commited on Aug 18, 2025

Commit

2c6c229

verified ·

1 Parent(s): c443f96

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -168

app.py CHANGED Viewed

@@ -1,27 +1,43 @@
-# --- set a writable doc path BEFORE importing gpt_researcher ---
-import os as _os
-_os.environ.setdefault("DOC_PATH", "/app/uploads")  # or "/tmp/my-docs"
-_os.makedirs(_os.environ["DOC_PATH"], exist_ok=True)
-# ---------------------------------------------------------------
-import io
-import uuid
-import asyncio
-from datetime import datetime
-from contextlib import redirect_stdout
 import streamlit as st
 import nest_asyncio
 from fpdf import FPDF
-from gpt_researcher import GPTResearcher
-# Streamlit page config
-st.set_page_config(layout="wide", page_title="GPT Researcher")
-# Allow asyncio.run in Streamlit
 nest_asyncio.apply()
-# -------- PDF helper --------
 class PDF(FPDF):
     def header(self):
         self.set_font("Arial", "B", 12)
@@ -33,193 +49,171 @@ class PDF(FPDF):
         self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
 def create_pdf(report_text: str) -> str:
-    """Write PDF to a unique temp path and return the path."""
-    pdf_path = f"/tmp/research_report_{uuid.uuid4().hex}.pdf"
     pdf = PDF()
     pdf.add_page()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.set_font("Arial", size=12)
-    for line in report_text.split("\n"):
-        # FPDF is latin-1: degrade gracefully
-        pdf.multi_cell(0, 10, line.encode("latin-1", "replace").decode("latin-1"))
-    pdf.output(pdf_path, "F")
-    return pdf_path
-# -------- live research runner --------
-async def run_research_streaming(query: str, report_type: str, report_source: str, sources: list, logs_placeholder):
-    """
-    Run research and stream stdout to logs_placeholder.
-    Returns (report_text, final_logs).
-    """
-    buf = io.StringIO()
-    with redirect_stdout(buf):
-        if report_source == "local":
-            # ensure DOC_PATH exists (already set before import, but keep it safe)
-            os.makedirs(os.environ["DOC_PATH"], exist_ok=True)
-            researcher = GPTResearcher(query=query, report_type=report_type, report_source="local")
         else:
             researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
-        # Start research so we can poll logs
-        task = asyncio.create_task(researcher.conduct_research())
-        # Stream logs while running
-        while not task.done():
-            await asyncio.sleep(0.5)
-            logs = buf.getvalue() or "Starting…"
-            logs_placeholder.text_area(
-                "Agent Logs (live)",
-                value=logs,
-                height=220,
-                key=f"live_logs_{uuid.uuid4()}",
-            )
-        # Propagate exceptions if any
-        await task
-        # Final logs refresh
-        logs_placeholder.text_area(
-            "Agent Logs (live)",
-            value=buf.getvalue() or "Finalizing…",
-            height=220,
-            key=f"live_logs_final_{uuid.uuid4()}",
-        )
-        # Write the report
-        report_text = await researcher.write_report()
-    final_logs = buf.getvalue()
-    return report_text, final_logs
-# ---------------- UI ----------------
 st.title("GPT Researcher")
 st.markdown(
     """
-GPT Researcher is an autonomous agent for web/doc research that produces a detailed, factual report.
-"""
-)
-with st.expander("Why Use GPT Researcher?", expanded=False):
-    st.markdown(
-        """
-- **Objective & Factual**
-- **Time-Efficient**
-- **Up-to-Date** (web or uploaded docs)
-- **Long-Form Reports** (2,000+ words possible)
-"""
-    )
-# Input label with accessibility (hide visually but not empty)
-user_query = st.text_input(
-    "Research query",
-    "Why is the Stock Price of Nvidia Soaring?",
-    help="Type your research question or topic.",
-    label_visibility="collapsed",
 )
-current_date = datetime.now().strftime("%B %Y")
-final_query = f"{user_query} Current Date is {current_date}" if user_query else ""
-st.sidebar.title("Research Settings")
-research_type = st.sidebar.selectbox(
-    "Select research type:",
-    ["Web Research", "Document Research"],
-    help="Choose web-based research or research from local documents.",
-)
-report_type = st.sidebar.selectbox(
-    "Select report type:",
-    ["research_report", "resource_list", "article_outline"],
-    help="Choose the format of the final report.",
-)
-# Sources / uploads
-sources = []
-if research_type == "Web Research":
-    sources_input = st.sidebar.text_area(
-        "Enter your sources (optional, comma-separated URLs):",
-        help="Provide a list of URLs separated by commas.",
-    )
-    if sources_input:
-        sources = [u.strip() for u in sources_input.split(",") if u.strip()]
 else:
-    uploaded_files = st.sidebar.file_uploader(
-        "Upload files for local research:",
-        accept_multiple_files=True,
-        help=f"Files are saved to {os.environ['DOC_PATH']}",
-    )
-    if uploaded_files:
-        for up in uploaded_files:
-            fp = os.path.join(os.environ["DOC_PATH"], up.name)
-            with open(fp, "wb") as f:
-                f.write(up.getbuffer())
-# Keys check (optional UI hint)
-if not os.getenv("OPENAI_API_KEY") or not os.getenv("TAVILY_API_KEY"):
-    st.error("OPENAI_API_KEY or TAVILY_API_KEY is not set in environment variables.")
-run_clicked = st.sidebar.button("Run Research", type="primary")
-if run_clicked:
-    if not user_query:
-        st.warning("Please enter a research query.")
     else:
-        # Use Tavily retriever (what the original app did)
-        os.environ["RETRIEVER"] = "tavily"
-        report_source = "local" if research_type == "Document Research" else "web"
-        st.subheader("Agent Logs")
-        live_logs_placeholder = st.empty()
-        with st.spinner("Running research…"):
-            report_text, final_logs = asyncio.run(
-                run_research_streaming(
-                    query=final_query,
-                    report_type=report_type,
-                    report_source=report_source,
-                    sources=sources,
-                    logs_placeholder=live_logs_placeholder,
-                )
-            )
-        st.session_state["report"] = report_text
-        st.session_state["logs"] = final_logs
-# ------------- Results -------------
-if "report" in st.session_state:
     st.markdown("### Research Report")
-    st.markdown(st.session_state["report"])
-    # Create & offer PDF
     try:
-        pdf_path = create_pdf(st.session_state["report"])
         with open(pdf_path, "rb") as pdf_file:
             st.download_button(
                 label="Download report as PDF",
                 data=pdf_file,
                 file_name="report.pdf",
-                mime="application/pdf",
             )
     except Exception as e:
         st.warning(f"Could not generate PDF: {e}")
-st.markdown("### Agent Logs (final)")
-st.text_area(
-    "Logs snapshot after run:",
-    value=st.session_state.get("logs", ""),
-    height=220,
-    key=f"logs_snapshot_{uuid.uuid4()}",
-)
-# Hide Streamlit footer & menu
-st.markdown(
-    """
-    <style>
-    #MainMenu {visibility: hidden;}
-    footer {visibility: hidden;}
-    </style>
-    """,
-    unsafe_allow_html=True,
-)

 import streamlit as st
+import os
+from gpt_researcher import GPTResearcher
+import asyncio
 import nest_asyncio
+from contextlib import redirect_stdout
+import io
 from fpdf import FPDF
+from datetime import datetime
+import uuid
+import importlib.metadata
+# ---------- version helpers (as in your original) ----------
+def get_version(package_name, module=None):
+    try:
+        if module and hasattr(module, '__version__'):
+            version = module.__version__
+        else:
+            version = importlib.metadata.version(package_name)
+        print(f"{package_name} version: {version}")
+    except AttributeError:
+        print(f"{package_name} does not have a __version__ attribute.")
+    except importlib.metadata.PackageNotFoundError:
+        print(f"{package_name} is not installed.")
+import asyncio as _asyncio
+get_version('streamlit', st)
+get_version('gpt_researcher')
+get_version('nest_asyncio', nest_asyncio)
+get_version('fpdf')
+print("\nStandard Library Modules:")
+for lib in ['os', 'asyncio', 'contextlib', 'io', 'datetime', 'uuid']:
+    print(f"{lib} is part of the Python Standard Library and does not have a separate version number.")
+# ---------- Streamlit / asyncio setup ----------
+st.set_page_config(layout="wide")
 nest_asyncio.apply()
+# ---------- PDF helper (same logic, safe path) ----------
 class PDF(FPDF):
     def header(self):
         self.set_font("Arial", "B", 12)
         self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
 def create_pdf(report_text: str) -> str:
+    """Create the PDF and return a unique file path under /tmp."""
+    os.makedirs("/tmp", exist_ok=True)
+    pdf_path = f"/tmp/report_{uuid.uuid4().hex}.pdf"
     pdf = PDF()
     pdf.add_page()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.set_font("Arial", size=12)
+    for line in report_text.split('\n'):
+        # FPDF is latin-1; degrade gracefully
+        pdf.multi_cell(0, 10, line.encode('latin-1', 'replace').decode('latin-1'))
+    pdf.output(pdf_path, 'F')
+    return pdf_path
+# ---------- async run (unchanged behavior: capture stdout, return at end) ----------
+async def get_report(query: str, report_type: str, sources: list, report_source: str):
+    f = io.StringIO()
+    unique_key = str(uuid.uuid4())  # kept from your original (not strictly used)
+    with redirect_stdout(f):
+        if report_source == 'local':
+            # Use a guaranteed-writable dir in containers
+            os.environ['DOC_PATH'] = '/tmp/uploads'
+            os.makedirs(os.environ['DOC_PATH'], exist_ok=True)
+            researcher = GPTResearcher(query=query, report_type=report_type, report_source=report_source)
         else:
             researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
+        await researcher.conduct_research()
+        # Your original polling to avoid infinite loop
+        max_attempts = 30
+        attempts = 0
+        while attempts < max_attempts:
+            logs = f.getvalue()
+            if "Finalized research step" in logs:
+                break
+            await asyncio.sleep(1)
+            attempts += 1
+        report = await researcher.write_report()
+    return report, f.getvalue()
+# ---------- UI (same layout/wording as your original) ----------
 st.title("GPT Researcher")
+st.markdown("""
+GPT Researcher is an autonomous agent designed for comprehensive online research tasks.
+It pulls information from the web or uploaded documents to create detailed, factual, research reports.
+""")
+with st.expander("Why Use GPT Researcher?", expanded=False):
+    st.markdown("""
+- **Objective and Unbiased**: GPT Researcher focuses on delivering accurate and factual information without bias.
+- **Time-Efficient**: It significantly reduces the time required for manual research tasks.
+- **Up-to-Date Information**: Unlike traditional LLMs, GPT Researcher avoids outdated information and minimizes the risk of hallucinations.
+- **Comprehensive Reports**: Capable of producing long, detailed research reports (2,000+ words).
+- **Reduced Misinformation**: By considering a wide range of sources, it minimizes the risks associated with limited or biased information.
+For more details, visit the [GPT Researcher GitHub repository](https://github.com/assafelovic/gpt-researcher).
+""")
+# Keep your style tweaks
 st.markdown(
     """
+    <style>
+    .big-green-font { font-size:20px !important; font-weight: bold; color: green; margin-bottom: -10px; }
+    .stTextInput > div > input { margin-top: -25px; }
+    </style>
+    """,
+    unsafe_allow_html=True,
 )
+st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
+default_query = "Why is the Stock Price of Nvidia Soaring?"
+# Same input — just hide label for accessibility warning
+user_query = st.text_input("", default_query, help="Type your research question or topic.", label_visibility="collapsed")
+if user_query:
+    current_date = datetime.now().strftime("%B %Y")
+    final_query = f"{user_query} Current Date is {current_date}"
 else:
+    final_query = ""
+st.sidebar.title("Research Settings")
+with st.sidebar.expander("How to Use", expanded=False):
+    st.markdown("""
+### How to Use
+1. **Select Research Type**: Choose between Web Research and Document Research.
+2. **Enter Research Query**: Type in your research question or topic.
+3. **Choose Report Type**: Select the format of the report you want (research report, resource list, or article outline).
+4. **Provide Sources or Upload Files**: For Web Research, you can enter URLs. For Document Research, upload the necessary files.
+5. **Run Research**: Click the "Run Research" button to start. The logs will update after the run, and the final report will be displayed and available for download as a PDF.
+""")
+with st.sidebar:
+    research_type = st.selectbox("Select research type:", ["Web Research", "Document Research"], help="Choose between web-based research or research from local documents.")
+    report_type = st.selectbox("Select report type:", ["research_report", "resource_list", "article_outline"], help="Choose the format of the final report.")
+    if research_type == "Web Research":
+        sources_input = st.text_area("Enter your sources (optional, comma-separated URLs):", help="Provide a list of URLs to use as sources, separated by commas.")
+        sources = [url.strip() for url in sources_input.split(',') if url.strip()]
     else:
+        uploaded_files = st.file_uploader("Upload files for local research:", accept_multiple_files=True, help="Upload documents for the research.")
+        sources = []
+        if uploaded_files:
+            # Save uploads to the writable /tmp/uploads
+            os.makedirs("/tmp/uploads", exist_ok=True)
+            for uploaded_file in uploaded_files:
+                file_path = os.path.join("/tmp/uploads", uploaded_file.name)
+                with open(file_path, "wb") as f:
+                    f.write(uploaded_file.getbuffer())
+    if st.button("Run Research"):
+        if not user_query:
+            st.warning("Please enter a research query.")
+        else:
+            # Same as your original
+            os.environ['RETRIEVER'] = 'tavily'
+            report_source = 'local' if research_type == "Document Research" else 'web'
+            with st.spinner("Running research..."):
+                report, logs = asyncio.run(get_report(final_query, report_type, sources, report_source))
+                st.session_state.report = report
+                st.session_state.logs = logs
+# ---------- Results (report + PDF + final logs, same as your original) ----------
+if 'report' in st.session_state:
     st.markdown("### Research Report")
+    st.markdown(st.session_state.report)
+    # Create PDF safely under /tmp and offer download
     try:
+        pdf_path = create_pdf(st.session_state.report)
         with open(pdf_path, "rb") as pdf_file:
             st.download_button(
                 label="Download report as PDF",
                 data=pdf_file,
                 file_name="report.pdf",
+                mime="application/pdf"
             )
     except Exception as e:
         st.warning(f"Could not generate PDF: {e}")
+st.markdown("### Agent Logs")
+if 'logs' in st.session_state:
+    st.text_area(
+        "Logs will appear here during the research process:",
+        value=st.session_state.logs,
+        height=200,
+        key=f"logs_{uuid.uuid4()}"  # same trick you had originally
+    )
+else:
+    st.text_area(
+        "Logs will appear here during the research process",
+        height=200,
+        key=f"logs_{uuid.uuid4()}"
+    )
+# Hide Streamlit's default footer and menu
+hide_streamlit_style = """
+<style>
+#MainMenu {visibility: hidden;}
+footer {visibility: hidden;}
+</style>
+"""
+st.markdown(hide_streamlit_style, unsafe_allow_html=True)