Spaces:

Ani14
/

AutoReasearcher

Sleeping

App Files Files Community

Ani14 commited on Apr 27, 2025

Commit

05ea5c0

verified ·

1 Parent(s): ce5edf3

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -58

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ from PIL import Image
 from io import BytesIO
 from fpdf import FPDF
 import base64
 from duckduckgo_search import DDGS
 # --- Load API Keys ---
@@ -22,7 +23,7 @@ tavily = TavilyClient(api_key=TAVILY_API_KEY)
 # --- Helper Functions ---
-def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=20000, temperature=0.7):
     url = "https://openrouter.ai/api/v1/chat/completions"
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
@@ -114,14 +115,6 @@ def extract_year_from_text(text):
     years = re.findall(r"\b(19|20)\d{2}\b", text)
     return int(years[0]) if years else 9999
-def generate_apa_citation(title, url, source, year=None):
-    title = title or "Untitled Source"
-    url = url or "Unavailable URL"
-    year = year if isinstance(year, int) and 1900 <= year <= datetime.datetime.now().year else "n.d."
-    source_map = {"arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"}
-    label = source_map.get(source, "*External Source*")
-    return f"{title}. ({year}). {label}. {url}"
 def merge_duplicates(entries):
     unique = []
     seen_titles = []
@@ -148,26 +141,34 @@ def build_chronological_progression(sources):
         summary += f"**{year}**\n{entries}\n\n"
     return summary.strip()
-# --- Streamlit UI ---
-st.set_page_config(page_title="🧠 Deep Research Assistant", layout="centered")
-if "last_report" not in st.session_state:
-    st.session_state["last_report"] = ""
 with st.sidebar:
-    st.title("🧠 Deep Research Assistant")
     topic = st.text_input("🔍 Enter your research topic")
     report_type = st.selectbox("📄 Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
     tone = st.selectbox("🎯 Tone", ["Objective", "Persuasive", "Narrative"])
     source_type = st.selectbox("📚 Sources", ["Web Only", "Academic Only", "Hybrid"])
-    custom_domains = st.text_input("🌐 Optional Web Domains", placeholder="example.com, forbes.com")
-    research_button = st.button("🚀 Run Deep Research")
 st.title("📘 Research Output")
 if research_button and topic:
     try:
-        with st.status("🔍 Gathering and analyzing sources..."):
             all_sources = []
             if source_type in ["Web Only", "Hybrid"]:
                 all_sources += get_sources(topic, custom_domains) if custom_domains.strip() else get_sources(topic)
@@ -182,67 +183,74 @@ if research_button and topic:
             merged = sort_sources_chronologically(merged)
             chronological_progress = build_chronological_progression(merged)
-        # --- Source Image Previews ---
-        st.subheader("🖼 Source Previews")
-        image_shown = False
-        cols = st.columns(2)
-        for i, m in enumerate(merged):
-            if m.get("image_url"):
-                with cols[i % 2]:
-                    st.image(m["image_url"], caption=m["title"], use_container_width=True)
-                    image_shown = True
-        if not image_shown:
-            st.info("ℹ️ No image previews available.")
-        # --- Building Final Prompt ---
-        citations = [generate_apa_citation(m.get('title'), m.get('url'), m.get('source'), m.get('year')) for m in merged]
-        combined_text = "\n\n".join([
-            f"- [{m['title']}]({m['url']}) ({m.get('year', 'n.d.')})\n> {m.get('snippet', m.get('summary', ''))[:300]}..."
-            for m in merged
         ])
         length_instruction = {
-            "Summary": "Keep it short and focused under 500 words.",
-            "Detailed Report": "Around 1000+ words with gaps and suggestions.",
-            "Thorough Academic Research": "Over 5000+ words, full academic LIKE A RESEARCH PAPER FORMAT OF IEEE"
         }[report_type]
         prompt = f"""
-        You are an expert research assistant.
-        Your task is to write a {report_type.lower()} in a {tone.lower()} tone.
-        🔍 Use the following structure:
-        1. Chronological Mapping: based on provided timeline. you should describe the advancements in the topic, what experiments were conducted and what methods were used and how the progression has been made
-        2. Gap Identification: what is missing.
-        3. Novel Contribution: suggest improvements.-- Suggest a Novel topic based on the gaps and provide an overview on how a researcher can benefit from the topic
-        4. Structured Report: headings, clarity- with all headings and discussions like a proper research paper
-        ✂️ Writing Instruction:
-        {length_instruction}
-        📜 Research Progress Over Time:
-        {chronological_progress}
-        📚 Sources:
-        {combined_text}
-        📎 APA Citations:
-        {chr(10).join(citations)}
-        """
-        # --- Generate Final Output ---
         st.subheader(f"📝 {report_type} on '{topic}'")
         output_placeholder = st.empty()
-        full_output = ""
         for chunk in call_llm([{"role": "user", "content": prompt}]):
-            full_output += chunk
-            output_placeholder.markdown(full_output, unsafe_allow_html=True)
-        st.session_state["last_report"] = full_output
     except Exception as e:
         st.error(f"❌ Error: {e}")
 # 🧠 Initialize session state
 if "last_report" not in st.session_state:

 from io import BytesIO
 from fpdf import FPDF
 import base64
+import uuid
 from duckduckgo_search import DDGS
 # --- Load API Keys ---
 # --- Helper Functions ---
+def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=4000, temperature=0.7):
     url = "https://openrouter.ai/api/v1/chat/completions"
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
     years = re.findall(r"\b(19|20)\d{2}\b", text)
     return int(years[0]) if years else 9999
 def merge_duplicates(entries):
     unique = []
     seen_titles = []
         summary += f"**{year}**\n{entries}\n\n"
     return summary.strip()
+# --- Initialize Streamlit Session ---
+st.set_page_config(page_title="🧠 Deep Research Assistant 2.0", layout="centered")
+if "memory_bank" not in st.session_state:
+    st.session_state.memory_bank = []
+if "chat_threads" not in st.session_state:
+    st.session_state.chat_threads = {}
+if "current_thread_id" not in st.session_state:
+    st.session_state.current_thread_id = None
+# --- Sidebar Inputs ---
 with st.sidebar:
+    st.title("Deep Research Assistant 2.0 🚀")
     topic = st.text_input("🔍 Enter your research topic")
     report_type = st.selectbox("📄 Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
     tone = st.selectbox("🎯 Tone", ["Objective", "Persuasive", "Narrative"])
     source_type = st.selectbox("📚 Sources", ["Web Only", "Academic Only", "Hybrid"])
+    custom_domains = st.text_input("🌐 Optional Domains", placeholder="example.com, nature.com")
+    research_button = st.button("🚀 Start Research")
+# --- Main Logic ---
 st.title("📘 Research Output")
 if research_button and topic:
     try:
+        with st.status("🔎 Gathering sources..."):
             all_sources = []
             if source_type in ["Web Only", "Hybrid"]:
                 all_sources += get_sources(topic, custom_domains) if custom_domains.strip() else get_sources(topic)
             merged = sort_sources_chronologically(merged)
             chronological_progress = build_chronological_progression(merged)
+        # Prepare previous learnings
+        previous_learnings = "\n\n".join(st.session_state.memory_bank[-5:])  # last 5 learnings
+        citations = [f"- {s['title']} ({s['year']}) [{s['source']}]({s['url']})" for s in merged]
+        sources_text = "\n".join([
+            f"- [{s['title']}]({s['url']}) ({s['year']})\n> {s.get('snippet', s.get('summary', ''))[:300]}..."
+            for s in merged
         ])
         length_instruction = {
+            "Summary": "Keep it concise, under 300 words.",
+            "Detailed Report": "Write 500-700 words with critical insights.",
+            "Thorough Academic Research": "Craft a full academic paper >1000 words."
         }[report_type]
+        # Create Thread ID
+        thread_id = str(uuid.uuid4())
+        st.session_state.current_thread_id = thread_id
+        st.session_state.chat_threads[thread_id] = []
+        # --- LLM Prompt ---
         prompt = f"""
+You are an expert research assistant.
+🔵 Past Knowledge:
+{previous_learnings}
+🔵 New Research Topic:
+{topic}
+🔵 Writing Style:
+{tone} tone, {length_instruction}
+🔵 Research Timeline:
+{chronological_progress}
+🔵 Sources:
+{sources_text}
+🔵 Citations:
+{chr(10).join(citations)}
+"""
+        # --- Generate Report ---
         st.subheader(f"📝 {report_type} on '{topic}'")
         output_placeholder = st.empty()
+        final_output = ""
         for chunk in call_llm([{"role": "user", "content": prompt}]):
+            final_output += chunk
+            output_placeholder.markdown(final_output, unsafe_allow_html=True)
+        st.session_state.memory_bank.append(final_output)
+        st.session_state.chat_threads[thread_id].append({"role": "assistant", "content": final_output})
     except Exception as e:
         st.error(f"❌ Error: {e}")
+# --- Show Chat Threads ---
+st.divider()
+st.subheader("📂 Your Research Threads")
+for tid, chats in st.session_state.chat_threads.items():
+    with st.expander(f"Thread {tid[:8]}"):
+        for msg in chats:
+            role = "🧑 You" if msg['role'] == 'user' else "🤖 Assistant"
+            st.markdown(f"**{role}:** {msg['content']}")
 # 🧠 Initialize session state
 if "last_report" not in st.session_state: