Spaces:

QuantumLearner
/

Space12

Sleeping

App Files Files Community

QuantumLearner commited on Aug 18, 2025

Commit

e252b93

verified ·

1 Parent(s): a89b0c7

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -76

app.py CHANGED Viewed

@@ -6,48 +6,42 @@ import nest_asyncio
 import importlib.metadata
 import tempfile
 from datetime import datetime
-from contextlib import redirect_stdout
 import streamlit as st
 from fpdf import FPDF
 from gpt_researcher import GPTResearcher
-# ---------- sensible defaults to avoid KeyError in gpt_researcher ----------
 os.environ.setdefault("LLM_PROVIDER", "openai")
 os.environ.setdefault("EMBEDDING_PROVIDER", "openai")
 os.environ.setdefault("EMBEDDING_MODEL", "text-embedding-3-small")
-# you'll set RETRIEVER later to 'tavily'
-# ---------- version printing (optional) ----------
-def get_version(package_name, module=None):
     try:
-        if module and hasattr(module, '__version__'):
-            version = module.__version__
-        else:
-            version = importlib.metadata.version(package_name)
-        print(f"{package_name} version: {version}")
     except Exception:
         pass
-get_version('streamlit', st)
-get_version('gpt_researcher')
-get_version('nest_asyncio', nest_asyncio)
-get_version('fpdf')
-print("\nStandard Library Modules:")
-for lib in ['os','asyncio','contextlib','io','datetime','uuid','tempfile']:
-    print(f"{lib} is part of the Python Standard Library.")
-# ---------- streamlit setup ----------
 st.set_page_config(layout="wide")
 nest_asyncio.apply()
 openai_api_key = os.getenv("OPENAI_API_KEY")
 tavily_api_key = os.getenv("TAVILY_API_KEY")
 if not openai_api_key or not tavily_api_key:
     st.error("API keys for OpenAI or Tavily are not set in the environment variables.")
-# ---------- PDF helpers (in-memory, no filesystem writes) ----------
 class PDF(FPDF):
     def header(self):
         self.set_font("Arial", "B", 12)
@@ -63,34 +57,51 @@ def create_pdf_bytes(report_text: str) -> bytes:
     pdf.add_page()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.set_font("Arial", size=12)
-    for line in report_text.split('\n'):
-        # keep compatibility with latin-1 fonts
-        pdf.multi_cell(0, 10, line.encode('latin-1', 'replace').decode('latin-1'))
-    # dest='S' returns a latin-1 str; encode to bytes for download
-    return pdf.output(dest='S').encode('latin-1')
-# ---------- async research ----------
-async def get_report(query: str, report_type: str, sources: list, report_source: str, doc_dir: str):
-    f = io.StringIO()
-    with redirect_stdout(f):
-        if report_source == 'local':
-            os.environ['DOC_PATH'] = doc_dir  # ensure gpt_researcher looks in /tmp/uploads
-            researcher = GPTResearcher(query=query, report_type=report_type, report_source='local')
-        else:
-            researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
-        await researcher.conduct_research()
-        # simple loop to let logs flush
-        for _ in range(30):
-            logs = f.getvalue()
-            if "Finalized research step" in logs:
-                break
             await asyncio.sleep(1)
         report = await researcher.write_report()
-    return report, f.getvalue()
 # ---------- UI ----------
 st.title("GPT Researcher")
@@ -103,7 +114,7 @@ with st.expander("Why Use GPT Researcher?", expanded=False):
 - **Objective and Unbiased**: Delivers accurate, factual information.
 - **Time-Efficient**: Reduces manual research time.
 - **Up-to-Date**: Minimizes outdated info and hallucinations.
-- **Comprehensive**: Can produce long, detailed reports (2,000+ words).
 - **Reduced Misinformation**: Considers multiple sources.
     """)
@@ -116,16 +127,11 @@ st.markdown(
     """,
     unsafe_allow_html=True,
 )
 st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
 default_query = "Why is the Stock Price of Nvidia Soaring?"
 user_query = st.text_input("", default_query, help="Type your research question or topic.")
-if user_query:
-    current_date = datetime.now().strftime("%B %Y")
-    final_query = f"{user_query} Current Date is {current_date}"
-else:
-    final_query = None
 st.sidebar.title("Research Settings")
 with st.sidebar.expander("How to Use", expanded=False):
@@ -134,65 +140,79 @@ with st.sidebar.expander("How to Use", expanded=False):
 2. **Enter Research Query**.
 3. **Choose Report Type**.
 4. **Provide Sources or Upload Files**.
-5. **Run Research** and download the PDF.
     """)
 research_type = st.sidebar.selectbox("Select research type:", ["Web Research", "Document Research"])
 report_type = st.sidebar.selectbox("Select report type:", ["research_report", "resource_list", "article_outline"])
-# use a guaranteed-writable location
 UPLOAD_DIR = os.path.join(tempfile.gettempdir(), "uploads")
 os.makedirs(UPLOAD_DIR, exist_ok=True)
 if research_type == "Web Research":
     sources_input = st.sidebar.text_area("Enter your sources (optional, comma-separated URLs):")
-    sources = [u.strip() for u in sources_input.split(',') if u.strip()]
 else:
     uploaded_files = st.sidebar.file_uploader("Upload files for local research:", accept_multiple_files=True)
     sources = []
     if uploaded_files:
-        for uploaded_file in uploaded_files:
-            with open(os.path.join(UPLOAD_DIR, uploaded_file.name), "wb") as f:
-                f.write(uploaded_file.getbuffer())
 run_clicked = st.sidebar.button("Run Research")
 if run_clicked:
     if not final_query:
         st.warning("Please enter a research query.")
     else:
-        # set retriever
-        os.environ['RETRIEVER'] = 'tavily'
-        report_source = 'local' if research_type == "Document Research" else 'web'
         with st.spinner("Running research..."):
-            report, logs = asyncio.run(get_report(final_query, report_type, sources, report_source, UPLOAD_DIR))
             st.session_state.report = report
             st.session_state.logs = logs
-# ---------- outputs ----------
-if 'report' in st.session_state:
-    st.markdown("### Research Report")
-    st.markdown(st.session_state.report)
-    # in-memory PDF (no filesystem writes)
     pdf_bytes = create_pdf_bytes(st.session_state.report)
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    st.download_button(
         label="Download report as PDF",
         data=pdf_bytes,
         file_name=f"report_{timestamp}.pdf",
         mime="application/pdf",
     )
-st.markdown("### Agent Logs")
-st.text_area(
-    "Logs will appear here during the research process:",
-    value=st.session_state.get('logs', ''),
-    height=200,
-    key=f"logs_{uuid.uuid4()}",
-)
-# Hide Streamlit UI chrome
 st.markdown("""
 <style>
 #MainMenu {visibility: hidden;}

 import importlib.metadata
 import tempfile
 from datetime import datetime
+from contextlib import redirect_stdout, redirect_stderr
 import streamlit as st
 from fpdf import FPDF
 from gpt_researcher import GPTResearcher
+# ---------- sensible defaults for gpt_researcher env ----------
 os.environ.setdefault("LLM_PROVIDER", "openai")
 os.environ.setdefault("EMBEDDING_PROVIDER", "openai")
 os.environ.setdefault("EMBEDDING_MODEL", "text-embedding-3-small")
+# ---------- tiny version printer (optional) ----------
+def get_version(pkg, module=None):
     try:
+        v = getattr(module, "__version__", None) if module else None
+        v = v or importlib.metadata.version(pkg)
+        print(f"{pkg} version: {v}")
     except Exception:
         pass
+get_version("streamlit", st)
+get_version("gpt_researcher")
+get_version("nest_asyncio", nest_asyncio)
+get_version("fpdf")
+# ---------- streamlit base ----------
 st.set_page_config(layout="wide")
 nest_asyncio.apply()
+# API keys
 openai_api_key = os.getenv("OPENAI_API_KEY")
 tavily_api_key = os.getenv("TAVILY_API_KEY")
 if not openai_api_key or not tavily_api_key:
     st.error("API keys for OpenAI or Tavily are not set in the environment variables.")
+# ---------- PDF helpers (in-memory only) ----------
 class PDF(FPDF):
     def header(self):
         self.set_font("Arial", "B", 12)
     pdf.add_page()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.set_font("Arial", size=12)
+    for line in report_text.split("\n"):
+        pdf.multi_cell(0, 10, line.encode("latin-1", "replace").decode("latin-1"))
+    # dest='S' returns str; encode for bytes
+    return pdf.output(dest="S").encode("latin-1")
+# ---------- live research with streaming logs ----------
+async def run_research_streaming(query: str, report_type: str, sources: list, report_source: str, doc_dir: str, logs_box):
+    buf = io.StringIO()
+    # Build researcher
+    if report_source == "local":
+        os.environ["DOC_PATH"] = doc_dir
+        researcher = GPTResearcher(query=query, report_type=report_type, report_source="local")
+    else:
+        researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
+    # Run and stream logs every second while the task is pending
+    with redirect_stdout(buf), redirect_stderr(buf):
+        task = asyncio.create_task(researcher.conduct_research())
+        while not task.done():
+            logs = buf.getvalue()
+            # update the single stable widget for logs
+            logs_box.text_area(
+                "Agent Logs (live)",
+                value=logs if logs else "Starting…",
+                height=240,
+                key="live_logs_box",
+            )
             await asyncio.sleep(1)
+        # ensure any final prints are captured
+        await task
+        final_logs = buf.getvalue()
+        logs_box.text_area(
+            "Agent Logs (live)",
+            value=final_logs,
+            height=240,
+            key="live_logs_box",
+        )
+        # Now write the report
         report = await researcher.write_report()
+    return report, final_logs
 # ---------- UI ----------
 st.title("GPT Researcher")
 - **Objective and Unbiased**: Delivers accurate, factual information.
 - **Time-Efficient**: Reduces manual research time.
 - **Up-to-Date**: Minimizes outdated info and hallucinations.
+- **Comprehensive**: Produces long, detailed reports (2,000+ words).
 - **Reduced Misinformation**: Considers multiple sources.
     """)
     """,
     unsafe_allow_html=True,
 )
 st.markdown('<p class="big-green-font">Enter your research query:</p>', unsafe_allow_html=True)
 default_query = "Why is the Stock Price of Nvidia Soaring?"
 user_query = st.text_input("", default_query, help="Type your research question or topic.")
+final_query = f"{user_query} Current Date is {datetime.now().strftime('%B %Y')}" if user_query else None
 st.sidebar.title("Research Settings")
 with st.sidebar.expander("How to Use", expanded=False):
 2. **Enter Research Query**.
 3. **Choose Report Type**.
 4. **Provide Sources or Upload Files**.
+5. **Run Research** — watch live logs and download the PDF.
     """)
 research_type = st.sidebar.selectbox("Select research type:", ["Web Research", "Document Research"])
 report_type = st.sidebar.selectbox("Select report type:", ["research_report", "resource_list", "article_outline"])
+# always-writable uploads dir
 UPLOAD_DIR = os.path.join(tempfile.gettempdir(), "uploads")
 os.makedirs(UPLOAD_DIR, exist_ok=True)
 if research_type == "Web Research":
     sources_input = st.sidebar.text_area("Enter your sources (optional, comma-separated URLs):")
+    sources = [u.strip() for u in sources_input.split(",") if u.strip()]
 else:
     uploaded_files = st.sidebar.file_uploader("Upload files for local research:", accept_multiple_files=True)
     sources = []
     if uploaded_files:
+        for f in uploaded_files:
+            with open(os.path.join(UPLOAD_DIR, f.name), "wb") as out:
+                out.write(f.getbuffer())
 run_clicked = st.sidebar.button("Run Research")
+# live logs placeholder (single stable widget)
+st.markdown("### Agent Logs")
+logs_placeholder = st.empty()
+# output placeholders
+report_placeholder = st.empty()
+download_placeholder = st.empty()
 if run_clicked:
     if not final_query:
         st.warning("Please enter a research query.")
     else:
+        os.environ["RETRIEVER"] = "tavily"
+        src = "local" if research_type == "Document Research" else "web"
         with st.spinner("Running research..."):
+            report, logs = asyncio.run(
+                run_research_streaming(
+                    final_query, report_type, sources, src, UPLOAD_DIR, logs_placeholder
+                )
+            )
+            # Save to session_state for persistence across reruns
             st.session_state.report = report
             st.session_state.logs = logs
+# Display results if we have them (e.g., after rerun)
+if "report" in st.session_state:
+    report_placeholder.markdown("### Research Report")
+    report_placeholder.markdown(st.session_state.report)
     pdf_bytes = create_pdf_bytes(st.session_state.report)
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    download_placeholder.download_button(
         label="Download report as PDF",
         data=pdf_bytes,
         file_name=f"report_{timestamp}.pdf",
         mime="application/pdf",
+        key="dl_pdf_btn",
     )
+# If logs exist from a previous run, keep them visible
+if "logs" in st.session_state:
+    logs_placeholder.text_area(
+        "Agent Logs (live)",
+        value=st.session_state.logs,
+        height=240,
+        key="live_logs_box",
+    )
+# Hide Streamlit chrome
 st.markdown("""
 <style>
 #MainMenu {visibility: hidden;}