Spaces:

Ani14
/

AutoReasearcher

Sleeping

App Files Files Community

Ani14 commited on Apr 21, 2025

Commit

3e7ca50

verified ·

1 Parent(s): 7d54951

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -144

app.py CHANGED Viewed

@@ -1,149 +1,36 @@
-import os
 import streamlit as st
-import requests
-import datetime
-import openai
-import feedparser
-from dotenv import load_dotenv
-from tavily import TavilyClient
-from PyPDF2 import PdfReader
-import faiss
-import numpy as np
-# --- Load API Keys ---
-load_dotenv()
-openai.api_key = os.getenv("OPENAI_API_KEY")
-TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "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")
-tavily = TavilyClient(api_key=TAVILY_API_KEY)
-# --- Streamlit Config ---
-st.set_page_config(page_title="GPT Researcher Agent", layout="wide")
-st.title("📚 GPT-Powered Research Assistant")
-# --- Helper: APA Citation ---
-def generate_apa_citation(title, url, source):
-    year = datetime.datetime.now().year
-    label = {
-        "arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web*"
-    }.get(source, "*Web*")
-    return f"{title}. ({year}). {label}. {url}"
-# --- Search Tools ---
-def tavily_search(query):
-    results = tavily.search(query, search_depth="advanced", max_results=5)
-    return results.get("results", [])
-def arxiv_search(query):
-    from urllib.parse import quote_plus
-    url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results=3"
-    feed = feedparser.parse(url)
-    return [{
-        "title": e.title,
-        "summary": e.summary.replace("\n", " ").strip(),
-        "url": next((l.href for l in e.links if l.type == "application/pdf"), "")
-    } for e in feed.entries]
-# --- Document Embedding ---
-def embed_document(file):
-    doc_text = ""
-    if file.name.endswith(".pdf"):
-        reader = PdfReader(file)
-        for page in reader.pages:
-            text = page.extract_text()
-            if text:
-                doc_text += text
-    else:
-        doc_text = file.read().decode("utf-8")
-    chunks = [doc_text[i:i+1000] for i in range(0, len(doc_text), 1000)]
-    embeddings = openai.Embedding.create(input=chunks, model="text-embedding-ada-002")
-    vectors = [np.array(rec["embedding"], dtype=np.float32) for rec in embeddings["data"]]
-    dim = len(vectors[0])
-    index = faiss.IndexFlatL2(dim)
-    index.add(np.vstack(vectors))
-    return chunks, index
-# --- Streaming GPT Call ---
-def stream_response(messages):
-    response = openai.ChatCompletion.create(
-        model="gpt-4",
-        messages=messages,
-        max_tokens=3000,
-        stream=True
-    )
-    collected = ""
-    placeholder = st.empty()
-    for chunk in response:
-        delta = chunk["choices"][0].get("delta", {})
-        if "content" in delta:
-            token = delta["content"]
-            collected += token
-            placeholder.markdown(collected + "▌")
-    placeholder.markdown(collected)
-    return collected
-# --- Sidebar Input ---
 with st.sidebar:
-    topic = st.text_input("🔍 Research Topic", "AI in Sustainable Agriculture")
-    report_type = st.selectbox("📄 Report Type", ["Summary", "Detailed", "Academic Paper"])
-    tone = st.selectbox("🎯 Tone", ["Objective", "Scientific", "Persuasive"])
-    sources = st.selectbox("🌐 Sources", ["Web", "Documents", "Both"])
-    uploaded_file = st.file_uploader("📎 Upload Document (PDF/TXT)", type=["pdf", "txt"])
-    start_button = st.button("🚀 Run Research")
-# --- Main Agent Execution ---
-if start_button and topic:
-    st.subheader("🧠 Agent Log")
-    with st.container():
-        st.markdown("<div style='max-height:300px; overflow-y:auto; background:#222; padding:10px; border-radius:10px;'>", unsafe_allow_html=True)
-        st.markdown("🧭 Starting research task...")
-        st.markdown(f"🔎 Topic: **{topic}** | Tone: _{tone}_ | Type: _{report_type}_")
-        st.markdown("</div>", unsafe_allow_html=True)
-    citations = []
-    context = ""
-    if sources in ["Web", "Both"]:
-        st.info("🌐 Searching web sources via Tavily...")
-        web_results = tavily_search(topic)
-        for r in web_results:
-            context += f"{r.get('content','')}
-"
-            citations.append(generate_apa_citation(r.get("title", "Untitled"), r.get("url", "#"), "web"))
-    if sources in ["Documents", "Both"] and uploaded_file:
-        st.info("📄 Embedding and retrieving from uploaded document...")
-        chunks, index = embed_document(uploaded_file)
-        q_embed = openai.Embedding.create(input=[topic], model="text-embedding-ada-002")
-        q_vector = np.array(q_embed["data"][0]["embedding"], dtype=np.float32).reshape(1, -1)
-        D, I = index.search(q_vector, k=3)
-        for idx in I[0]:
-            context += chunks[idx] + "
-"
-        citations.append(generate_apa_citation(uploaded_file.name, "Uploaded", "local"))
-    st.info("✍️ Generating final research report...")
-    messages = [
-        {"role": "system", "content": f"You are a research assistant. Write a {report_type.lower()} in a {tone.lower()} tone, citing sources."},
-        {"role": "user", "content": f"Topic: {topic}
-Context:
-{context}
-Write a complete report in academic markdown format."}
-    ]
-    final_output = stream_response(messages)
-    # --- Show Output and Citations ---
-    st.subheader("📄 Final Report")
-    st.markdown(final_output, unsafe_allow_html=True)
-    st.subheader("📚 References")
-    for cite in citations:
-        st.markdown(f"- {cite}")
-    st.download_button("💾 Download Markdown", final_output, file_name="report.md", mime="text/markdown")

 import streamlit as st
+from gpt_researcher.agent import GPTResearcher
+st.set_page_config(page_title="GPT Researcher UI", layout="wide")
+st.title("🤖 GPT Researcher — Streamlit UI")
+# --- Sidebar inputs ---
 with st.sidebar:
+    st.header("🧠 Research Configuration")
+    topic = st.text_input("💡 Research Topic", "AI in climate change")
+    report_type = st.selectbox("📄 Report Type", ["summary", "detailed", "academic"])
+    report_format = st.selectbox("📜 Format", ["markdown", "text"])
+    report_source = st.selectbox("🌐 Sources", ["web", "arxiv", "semantic-scholar", "hybrid"])
+    tone = st.selectbox("🎯 Tone", ["objective", "persuasive", "informative"])
+    start = st.button("🚀 Start Research")
+# --- Run GPTResearcher ---
+if start and topic:
+    st.markdown("### ⏳ Running Autonomous Research Agent...")
+    with st.spinner("Gathering knowledge, synthesizing insights..."):
+        agent = GPTResearcher(
+            query=topic,
+            report_type=report_type,
+            report_format=report_format,
+            report_source=report_source,
+            tone=tone
+        )
+        output = agent.run()
+    st.success("✅ Research Complete!")
+    st.markdown("### 📄 Final Report")
+    st.markdown(output, unsafe_allow_html=True)
+    st.download_button("💾 Download Markdown", output, file_name="report.md", mime="text/markdown")