Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| import requests | |
| import datetime | |
| import feedparser | |
| import time | |
| from dotenv import load_dotenv | |
| from tavily import TavilyClient | |
| from fuzzywuzzy import fuzz | |
| from urllib.parse import quote_plus | |
| from PIL import Image | |
| from io import BytesIO | |
| from fpdf import FPDF | |
| # --- Load Keys --- | |
| load_dotenv() | |
| OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") | |
| TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "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") | |
| tavily = TavilyClient(api_key=TAVILY_API_KEY) | |
| # --- Layout --- | |
| st.set_page_config("Deep Research Bot", layout="wide") | |
| with st.sidebar: | |
| st.title("π§ Research Input") | |
| topic = st.text_input("π‘ What would you like me to research next?") | |
| report_type = st.selectbox("π Type of report", [ | |
| "Summary - Short and fast (~2 min)", | |
| "Detailed Report (~5 min)", | |
| "Thorough Academic Research (~10 min)" | |
| ]) | |
| tone = st.selectbox("π― Tone of the report", [ | |
| "Objective - Impartial and unbiased presentation of facts and findings", | |
| "Persuasive - Advocating a specific point of view", | |
| "Narrative - Storytelling tone for layperson readers" | |
| ]) | |
| source_type = st.selectbox("π Sources to include", [ | |
| "Web Only", "Academic Only", "Hybrid" | |
| ]) | |
| custom_domains = st.text_input("π Query Domains (Optional)", placeholder="techcrunch.com, forbes.com") | |
| st.title("π€ Real-time Deep Research Agent (Tavily Edition)") | |
| st.markdown("This powerful assistant autonomously gathers, analyzes, and synthesizes research from multiple sources in real-time using Tavily, ArXiv, and Semantic Scholar.") | |
| # --- Helper Functions --- | |
| def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7): | |
| url = "https://openrouter.ai/api/v1/chat/completions" | |
| headers = { | |
| "Authorization": f"Bearer {OPENROUTER_API_KEY}", | |
| "Content-Type": "application/json", | |
| "X-Title": "GPT Deep Research Agent" | |
| } | |
| data = { | |
| "model": model, | |
| "messages": messages, | |
| "max_tokens": max_tokens, | |
| "temperature": temperature | |
| } | |
| response = requests.post(url, headers=headers, json=data) | |
| result = response.json() | |
| if response.status_code != 200: | |
| raise RuntimeError(result.get("error", {}).get("message", "LLM API error")) | |
| return result["choices"][0]["message"]["content"] | |
| def get_sources(topic, domains=None): | |
| query = topic | |
| if domains: | |
| domain_filters = [d.strip() for d in domains.split(",") if d.strip()] | |
| query += " site:" + " OR site:".join(domain_filters) | |
| response = tavily.search(query=query, search_depth="advanced", max_results=10) | |
| sources = [] | |
| for item in response.get("results", []): | |
| sources.append({ | |
| "title": item.get("title"), | |
| "url": item.get("url"), | |
| "snippet": item.get("content", "") | |
| }) | |
| return sources | |
| def get_arxiv_papers(query): | |
| url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results=5" | |
| feed = feedparser.parse(url) | |
| return [{ | |
| "title": e.title, | |
| "summary": e.summary.replace("\n", " ").strip(), | |
| "url": next((l.href for l in e.links if l.type == "application/pdf"), "") | |
| } for e in feed.entries] | |
| def get_semantic_papers(query): | |
| url = "https://api.semanticscholar.org/graph/v1/paper/search" | |
| params = {"query": query, "limit": 5, "fields": "title,abstract,url"} | |
| response = requests.get(url, params=params) | |
| papers = response.json().get("data", []) | |
| return [{ | |
| "title": p.get("title"), | |
| "summary": p.get("abstract", "No abstract available"), | |
| "url": p.get("url") | |
| } for p in papers] | |
| def generate_apa_citation(title, url, source): | |
| year = datetime.datetime.now().year | |
| label = { | |
| "arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*" | |
| }.get(source, "*Web*") | |
| return f"{title}. ({year}). {label}. {url}" | |
| def check_plagiarism(text, topic): | |
| hits = [] | |
| for r in get_sources(topic, ""): | |
| similarity = fuzz.token_set_ratio(text, r["snippet"]) | |
| if similarity >= 75: | |
| hits.append(r) | |
| return hits | |
| def remove_duplicates(entries): | |
| unique = [] | |
| titles = [] | |
| for e in entries: | |
| if all(fuzz.token_set_ratio(e["title"], t) < 85 for t in titles): | |
| titles.append(e["title"]) | |
| unique.append(e) | |
| return unique | |
| def generate_image_from_topic(topic): | |
| img_prompt = f"Illustration representing '{topic}' in a research or technology context." | |
| image_url = f"https://source.unsplash.com/featured/?{quote_plus(topic)}" | |
| return image_url | |
| def generate_pdf(text): | |
| pdf = FPDF() | |
| pdf.add_page() | |
| pdf.set_auto_page_break(auto=True, margin=15) | |
| pdf.set_font("Arial", size=12) | |
| for line in text.split("\n"): | |
| pdf.multi_cell(0, 10, line) | |
| buffer = BytesIO() | |
| pdf.output(buffer) | |
| buffer.seek(0) | |
| return buffer | |
| # --- Execution --- | |
| if st.button("Research"): | |
| try: | |
| with st.spinner("π Gathering relevant research..."): | |
| all_entries = [] | |
| citations = [] | |
| if source_type in ["Web Only", "Hybrid"]: | |
| web_data = get_sources(topic, custom_domains) | |
| web_data = remove_duplicates(web_data) | |
| for w in web_data: | |
| all_entries.append({ | |
| "title": w['title'], | |
| "summary": w['snippet'], | |
| "url": w['url'], | |
| "source": "web" | |
| }) | |
| citations.append(generate_apa_citation(w['title'], w['url'], "web")) | |
| if source_type in ["Academic Only", "Hybrid"]: | |
| arxiv_data = get_arxiv_papers(topic) | |
| semantic_data = get_semantic_papers(topic) | |
| academic_data = remove_duplicates(arxiv_data + semantic_data) | |
| for a in academic_data: | |
| all_entries.append({ | |
| "title": a['title'], | |
| "summary": a['summary'], | |
| "url": a['url'], | |
| "source": "arxiv" if "arxiv" in a['url'] else "semantic" | |
| }) | |
| citations.append(generate_apa_citation(a['title'], a['url'], a['source'])) | |
| st.success("β Data collected and filtered!") | |
| with st.spinner("π§ Writing final research report..."): | |
| sources_text = "" | |
| for e in all_entries: | |
| sources_text += f"- [{e['title']}]({e['url']})\n> {e['summary'][:300]}...\n\n" | |
| prompt = f""" | |
| # Research Task: {topic} | |
| Tone: {tone} | |
| Report Type: {report_type} | |
| Sources: | |
| {sources_text} | |
| Now, synthesize: | |
| 1. Research questions and gap | |
| 2. A novel insight or direction | |
| 3. A real-world application scenario | |
| 4. A {report_type.lower()} in paragraph format (use bullet points only if the paragraph is too long). | |
| Use larger heading for sections and slightly smaller for sub-sections. Do not use markdown or HTML, just plain text. | |
| """ | |
| output = call_llm([{"role": "user", "content": prompt}], max_tokens=3500) | |
| st.header("π Research Report") | |
| st.write(output) | |
| st.subheader("π APA Citations") | |
| for c in citations: | |
| st.markdown(f"- {c}") | |
| with st.spinner("π§ͺ Checking for overlaps..."): | |
| overlaps = check_plagiarism(output, topic) | |
| if overlaps: | |
| st.warning("β οΈ Potential content overlap found.") | |
| for h in overlaps: | |
| st.markdown(f"**{h['title']}** - [{h['url']}]({h['url']})") | |
| else: | |
| st.success("β No major overlaps detected.") | |
| if report_type.startswith("Thorough"): | |
| st.subheader("πΌοΈ Related Visual") | |
| image_url = generate_image_from_topic(topic) | |
| st.image(image_url, caption=f"Visual related to: {topic}", use_column_width=True) | |
| st.subheader("π₯ Download Options") | |
| pdf_file = generate_pdf(output) | |
| st.download_button("π Download PDF", data=pdf_file, file_name=f"{topic}_report.pdf", mime="application/pdf") | |
| st.download_button("π Download LaTeX (raw text)", data=output, file_name=f"{topic}_report.tex", mime="text/plain") | |
| except Exception as e: | |
| st.error(f"Error: {e}") |