Spaces:
Sleeping
Sleeping
File size: 20,535 Bytes
f138bf1 2d473c3 a77e234 83d8666 33e1f31 ed10cd6 2b97b69 bd2e62c 2b04ac1 1ef3c64 0557f2f 1a96a2f f138bf1 08418b2 151e525 a77e234 05644a0 2b04ac1 2d473c3 08418b2 ed10cd6 08418b2 ed10cd6 b963654 08418b2 05ea5c0 05644a0 08418b2 05644a0 83d8666 05644a0 83d8666 2b211fe 83d8666 fe78503 33e1f31 2b211fe 08418b2 1ef3c64 bcae0c1 fdfeeef bcae0c1 1ef3c64 bcae0c1 b963654 bcae0c1 afadb32 bcae0c1 1ef3c64 b963654 bcae0c1 05644a0 bcae0c1 afadb32 bcae0c1 afadb32 bcae0c1 afadb32 bcae0c1 afadb32 bcae0c1 6d2158f afadb32 b963654 ed10cd6 2cafe26 ed10cd6 08418b2 1a96a2f 2cafe26 ed10cd6 bcae0c1 1a96a2f bcae0c1 1a96a2f 08418b2 1a96a2f 04b8d03 1a96a2f f33b3b6 1a96a2f f33b3b6 1ef3c64 1a96a2f b963654 9d58b9f b963654 1ef3c64 afadb32 b963654 1ef3c64 ed10cd6 05ea5c0 1ef3c64 05ea5c0 bce88e7 b9713bd d262e7c b963654 1a96a2f 05ea5c0 1ef3c64 ed10cd6 05ea5c0 b963654 b9713bd 2caa6ce b9713bd ed10cd6 05ea5c0 b963654 ed10cd6 05ea5c0 b963654 ed10cd6 05ea5c0 b963654 ed10cd6 05ea5c0 d262e7c ed10cd6 05ea5c0 a6a0096 1a96a2f 1ef3c64 05ea5c0 1ef3c64 05ea5c0 68d14e0 05ea5c0 6407974 ed10cd6 5db3787 b963654 2a354bb 1a96a2f 05ea5c0 c8eaa55 1a96a2f ed10cd6 1a96a2f ed10cd6 1a96a2f ed10cd6 093bc19 2a354bb 093bc19 2a354bb 093bc19 2a354bb 093bc19 2a354bb 5db3787 377f206 5db3787 377f206 c8eaa55 2a354bb 5db3787 2a354bb 093bc19 5db3787 2a354bb 093bc19 5db3787 093bc19 5db3787 2a354bb 5db3787 e299c31 c8eaa55 e299c31 881216e e299c31 575ef18 881216e e299c31 881216e 575ef18 c8eaa55 e299c31 08418b2 c8eaa55 5db3787 c8eaa55 2a354bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 |
import os
import streamlit as st
import requests
import datetime
import time
import json
import uuid
from dotenv import load_dotenv
from tavily import TavilyClient
import feedparser
from fuzzywuzzy import fuzz
from fpdf import FPDF
from duckduckgo_search import DDGS
from io import BytesIO
# Load environment variables
load_dotenv()
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
tavily = TavilyClient(api_key=TAVILY_API_KEY)
# Initialize session state
if "memory_bank" not in st.session_state:
st.session_state.memory_bank = []
if "chat_threads" not in st.session_state:
st.session_state.chat_threads = {}
if "current_thread_id" not in st.session_state:
st.session_state.current_thread_id = None
if "last_report" not in st.session_state:
st.session_state.last_report = ""
if "methodology_notes" not in st.session_state:
st.session_state.methodology_notes = ""
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# Session data functions
def save_session_data():
data = {
"memory_bank": st.session_state.memory_bank,
"chat_threads": st.session_state.chat_threads
}
with open("session_memory.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
def load_session_data():
if os.path.exists("session_memory.json"):
with open("session_memory.json", "r", encoding="utf-8") as f:
data = json.load(f)
st.session_state.memory_bank = data.get("memory_bank", [])
st.session_state.chat_threads = data.get("chat_threads", {})
load_session_data()
# LLM call
def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=4000, temperature=0.7):
url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json"
}
data = {
"model": model,
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"stream": True
}
with requests.post(url, headers=headers, json=data, stream=True) as response:
for line in response.iter_lines():
if line:
decoded = line.decode("utf-8")
if decoded.startswith("data: "):
piece = decoded.replace("data: ", "").strip()
if piece != "[DONE]":
try:
parsed = json.loads(piece)
delta = parsed.get("choices", [{}])[0].get("delta", {})
token = delta.get("content", "")
if token:
yield token
except json.JSONDecodeError:
pass
# --- Source Gathering Functions ---
def get_image_urls(query, max_images=6):
with DDGS() as ddgs:
return [img["image"] for img in ddgs.images(query, max_results=max_images)]
def get_sources(topic, domains=None):
query = topic
if domains:
domain_filters = [d.strip() for d in domains.split(",") if d.strip()]
query += " site:" + " OR site:".join(domain_filters)
response = tavily.search(query=query, search_depth="advanced", max_results=10)
results = []
for r in response.get("results", []):
image_url = r.get("image_url")
if not image_url:
try:
images = get_image_urls(r["title"], max_images=1)
image_url = images[0] if images else None
except:
image_url = None
results.append({
"title": r["title"],
"url": r["url"],
"snippet": r.get("content", ""),
"image_url": image_url,
"source": "web",
"year": extract_year_from_text(r.get("content", ""))
})
return results
def get_arxiv_papers(query):
from urllib.parse import quote_plus
url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results=5"
feed = feedparser.parse(url)
return [{
"title": e.title,
"summary": e.summary.replace("\n", " ").strip(),
"url": next((l.href for l in e.links if l.type == "application/pdf"), ""),
"source": "arxiv",
"year": int(e.published[:4]) if 'published' in e else 9999
} for e in feed.entries]
def get_semantic_papers(query):
try:
url = "https://api.semanticscholar.org/graph/v1/paper/search"
params = {"query": query, "limit": 5, "fields": "title,abstract,url,year"}
response = requests.get(url, params=params)
papers = response.json().get("data", [])
return [{
"title": p.get("title"),
"summary": p.get("abstract", "No abstract available"),
"url": p.get("url"),
"source": "semantic",
"year": p.get("year", 9999)
} for p in papers]
except:
return []
def extract_year_from_text(text):
import re
years = re.findall(r"\b(19|20)\d{2}\b", text)
return int(years[0]) if years else 9999
def merge_duplicates(entries):
unique = []
seen_titles = []
for entry in entries:
if all(fuzz.token_set_ratio(entry['title'], seen) < 90 for seen in seen_titles):
unique.append(entry)
seen_titles.append(entry['title'])
return unique
def sort_sources_chronologically(sources):
return sorted(sources, key=lambda s: s.get("year", 9999))
def build_chronological_progression(sources):
timeline = {}
for s in sources:
year = s.get("year", 9999)
if year != 9999:
if year not in timeline:
timeline[year] = []
timeline[year].append(f"- {s['title']}")
summary = ""
for year in sorted(timeline.keys()):
entries = "\n".join(timeline[year])
summary += f"**{year}**\n{entries}\n\n"
return summary.strip()
def download_threads_as_pdf(chat_threads):
pdf = FPDF()
pdf.add_page()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.set_font("Arial", size=12)
for tid, chats in chat_threads.items():
pdf.cell(0, 10, f"Thread {tid[:8]}", ln=True)
for msg in chats:
role = "You" if msg["role"] == "user" else "Assistant"
text = f"{role}: {msg['content']}"
try:
text = text.encode('latin-1').decode('latin-1')
except UnicodeEncodeError:
text = text.encode('latin-1', 'replace').decode('latin-1')
pdf.multi_cell(0, 10, text)
pdf.ln(5)
pdf_output = BytesIO()
pdf_bytes = pdf.output(dest='S').encode('latin-1')
pdf_output.write(pdf_bytes)
pdf_output.seek(0)
return pdf_output
# --- Streamlit UI Start ---
st.set_page_config(page_title="π§ Deep Research Assistant 4.0", layout="centered")
# --- Sidebar ---
with st.sidebar:
st.markdown("## π Start New Research")
topic = st.text_input("π§ Topic")
report_type = st.selectbox("π Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
tone = st.selectbox("π― Tone", ["Objective", "Persuasive", "Narrative"])
source_type = st.selectbox("π Sources", ["Web Only", "Academic Only", "Hybrid"])
custom_domains = st.text_input("π Optional Domains", placeholder="forbes.com, mit.edu")
research_button = st.button("π Run Deep Research", use_container_width=True)
st.title("π Deep Research Assistant 4.0")
st.markdown("Where serious research meets serious style. π§ π")
st.divider()
# --- Web Images Section ---
if topic and research_button:
st.subheader("πΌ Related Images from the Web")
try:
topic_images = get_image_urls(topic, max_images=6)
if topic_images:
img_cols = st.columns(3)
for idx, img_url in enumerate(topic_images):
with img_cols[idx % 3]:
st.image(img_url, use_container_width=True)
else:
st.info("No images found for this topic.")
except Exception as e:
st.warning(f"Couldn't load topic images. ({e})")
# --- Main Research Section ---
if research_button and topic:
try:
with st.status("π Gathering sources..."):
all_sources = []
if source_type in ["Web Only", "Hybrid"]:
all_sources += get_sources(topic, custom_domains) if custom_domains.strip() else get_sources(topic)
if source_type in ["Academic Only", "Hybrid"]:
all_sources += get_arxiv_papers(topic)
all_sources += get_semantic_papers(topic)
if not all_sources:
raise ValueError("β No sources found.")
merged = merge_duplicates(all_sources)
merged = sort_sources_chronologically(merged)
chronological_progress = build_chronological_progression(merged)
previous_learnings = "\n\n".join(st.session_state.memory_bank[-5:])
citations = [f"- {s['title']} ({s['year']}) [{s['source']}]({s['url']})" for s in merged]
sources_text = "\n".join([
f"- [{s['title']}]({s['url']}) ({s['year']})\n> {s.get('snippet', s.get('summary', ''))[:300]}..."
for s in merged
])
length_instruction = {
"Summary": "Keep it concise, 700 words.",
"Detailed Report": "Write 1500+ words with critical insights.",
"Thorough Academic Research": "Craft a full academic paper >10000 words."
}[report_type]
# Create New Thread
thread_id = str(uuid.uuid4())
st.session_state.current_thread_id = thread_id
st.session_state.chat_threads[thread_id] = []
prompt = f"""
Use past learnings:
{previous_learnings}
π Use the following structure:
You are tasked with generating an academic-style research progression report based on a set of provided sources. Follow these steps carefully to ensure clarity, depth, and adherence to academic writing standards:
1. Chronological Mapping
Objective: Outline the research development over time, clearly presenting the progression of ideas.
For each paper/source, provide:
Publication year and proper citation (IEEE format).
Summary of the novelty: What new idea, method, or finding did the paper contribute?
Methods used: Summarize the key methodologies, frameworks, models, algorithms, experimental setups, or theoretical approaches.
Identified limitations: Explicitly mention the limitations, weaknesses, or open challenges acknowledged by the authors or identifiable from the paper.
Progression Mapping:
Describe how each subsequent paper attempted to overcome or address the limitations of previous works.
Highlight evolution of methods: e.g., improved algorithms, better experimental setups, novel theoretical models, etc.
2. Gap Identification
Objective: Identify unresolved issues or underexplored areas by analyzing the chronological mapping.
Process:
Based on the limitations and methods described, point out:
Aspects that have not been fully optimized.
Research questions that remain unanswered.
Potential for interdisciplinary approaches not yet considered.
Methodological, technological, or theoretical shortcomings.
Clearly list and explain the major gaps in a bullet-point or paragraph format.
3. Novel Contribution Proposal
Objective: Suggest a new research direction or idea that logically builds upon the identified gaps.
Proposal should include:
Novel Research Topic: Clear title or theme for the proposed research.
Experimental Design:
Describe the proposed experimental framework.
Define the datasets, tools, or systems to be used.
Mention control/variable considerations if applicable.
Statistical Design:
Specify the statistical tests or models planned for data analysis.
Ensure experimental reproducibility (sample size, power analysis, etc.).
Mention any validation techniques (cross-validation, bootstrapping, etc.).
Justify why your proposal addresses the identified gap effectively.
4. Formatted Report Structure
Final Output should follow proper IEEE academic formatting, including:
Title (for the report).
Abstract (summary of the full report).
Keywords (3β6 keywords relevant to the topic).
Introduction (background, motivation, and purpose).
Chronological Mapping (main section with subheadings by year or topic).
Gap Identification.
Proposed Novel Contribution.
Conclusion.
References (properly formatted in IEEE citation style).
Additional Notes:
Use formal academic language throughout.
Ensure logical flow between sections.
Highlight key terms or methods where appropriate (e.g., using italics or bold).
Be comprehensive but concise β avoid unnecessary repetition.
Maintain clarity and focus on contribution and novelty.
New Topic:
{topic}
Writing:
{tone} tone, {length_instruction}
Timeline:
{chronological_progress}
Sources:
{sources_text}
Citations:
{chr(10).join(citations)}
"""
# --- Generate Report ---
st.subheader(f"π {report_type} on '{topic}'")
output_placeholder = st.empty()
final_output = ""
for chunk in call_llm([{"role": "user", "content": prompt}]):
final_output += chunk
output_placeholder.markdown(final_output, unsafe_allow_html=True)
st.session_state.memory_bank.append(final_output)
st.session_state.chat_threads[thread_id].append({"role": "assistant", "content": final_output})
save_session_data()
except Exception as e:
st.error(f"β Error: {e}")
# --- Build Full Context (Research + Thread + Methodology) ---
def build_full_context():
full_context = ""
# Add Research Report
if st.session_state.get("last_report"):
full_context += f"=== Research Report ===\n{st.session_state['last_report']}\n\n"
# Add Thread Messages
if st.session_state.get("current_thread_id"):
thread_msgs = st.session_state.chat_threads.get(st.session_state.current_thread_id, [])
for msg in thread_msgs:
who = "User" if msg["role"] == "user" else "Assistant"
full_context += f"{who}: {msg['content']}\n\n"
# Add Methodology if available
if st.session_state.get("methodology_notes"):
full_context += f"=== Methodology Suggestions ===\n{st.session_state['methodology_notes']}\n\n"
return full_context
# --- Chat Threads Section ---
st.divider()
st.subheader("π Your Research Threads")
for tid, chats in st.session_state.chat_threads.items():
with st.expander(f"π§΅ Thread {tid[:8]}", expanded=False):
for msg in chats:
with st.chat_message(msg["role"] if msg["role"] in ["user", "assistant"] else "assistant"):
st.markdown(msg["content"])
followup = st.text_input(f"π¬ Continue Thread {tid[:8]}:", key=f"followup_{tid}")
if st.button(f"Ask Follow-up {tid}", key=f"button_{tid}"):
if followup:
with st.spinner("π€ Assistant is typing..."):
response = ""
for chunk in call_llm(st.session_state.chat_threads[tid] + [{"role": "user", "content": followup}], max_tokens=2000):
response += chunk
st.session_state.chat_threads[tid].append({"role": "user", "content": followup})
st.session_state.chat_threads[tid].append({"role": "assistant", "content": response})
save_session_data()
st.rerun()
# --- Download All Threads Section ---
if st.session_state.chat_threads:
st.divider()
st.subheader("π₯ Export Your Work")
pdf_file = download_threads_as_pdf(st.session_state.chat_threads)
st.download_button("π₯ Download All Threads as PDF", data=pdf_file, file_name="Research_Threads.pdf", mime="application/pdf", use_container_width=True)
# --- Methodology Recommender ---
st.divider()
st.subheader("π§ͺ Methodology Recommender")
if st.button("π§ Suggest Research Methodologies"):
context = build_full_context()
if context:
try:
method_prompt = [
{"role": "system", "content": "You are a research advisor."},
{"role": "user", "content": f"""Given the following conversation, research report, and context, suggest a very detailed and customized research methodology that matches the research objectives discussed.
\"\"\"{context}\"\"\""""}
]
method_output = ""
method_box = st.empty()
for chunk in call_llm(method_prompt):
method_output += chunk
method_box.markdown(method_output, unsafe_allow_html=True)
st.session_state["methodology_notes"] = method_output
except Exception as e:
st.error(f"β Methodology suggestion failed: {e}")
else:
st.warning("β οΈ No research context available. Please generate research first.")
# --- Follow-up Q&A (Contextual to Full Thread) ---
st.divider()
st.subheader("π¬ Follow-up Q&A")
followup = st.text_input("Ask a follow-up question:", key="follow_up_input")
if st.button("Ask Follow-up"):
context = build_full_context()
if followup and context:
try:
combined_prompt = [
{"role": "system", "content": "You are an expert academic research assistant."},
{"role": "user", "content": f"""Use ONLY the following research report, conversation, and methodology suggestions to answer the follow-up question below. Stay fully topic-specific and context-aware.
\"\"\"{context}\"\"\"
Follow-up Question: {followup}
"""}
]
response = ""
with st.chat_message("assistant"):
for chunk in call_llm(combined_prompt, max_tokens=2000):
response += chunk
st.markdown(response)
st.session_state.chat_history.append({"role": "user", "content": followup})
st.session_state.chat_history.append({"role": "assistant", "content": response})
except Exception as e:
st.error(f"β Follow-up error: {e}")
else:
st.warning("β οΈ No sufficient context available. Please generate research first.")
# --- Paper Upload for Review & Improvement ---
st.divider()
st.subheader("π€ Upload Your Paper for Feedback")
uploaded_file = st.file_uploader("Upload your research paper (.pdf or .txt)", type=["pdf", "txt"])
if uploaded_file and st.button("π§ Analyze Paper for Improvements"):
try:
def extract_text_from_file(file):
if file.name.endswith(".pdf"):
from PyPDF2 import PdfReader
reader = PdfReader(file)
return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
elif file.name.endswith(".txt"):
return file.read().decode("utf-8")
return ""
paper_text = extract_text_from_file(uploaded_file)
if not paper_text or len(paper_text.strip()) < 100:
st.warning("β οΈ The uploaded paper seems empty or too short to analyze.")
else:
feedback_prompt = [
{"role": "system", "content": "You are an expert academic advisor."},
{"role": "user", "content": f"""I have written the following research paper. Please analyze it and provide detailed suggestions on:
- Areas where the paper is weak or unclear
- How to improve the novelty or originality
- Structural improvements or better ways to present arguments
Be honest and constructive. Here's the full text:
\"\"\"{paper_text}\"\"\""""}
]
with st.status("π Analyzing your paper..."):
improvement_output = ""
feedback_box = st.empty()
for chunk in call_llm(feedback_prompt, max_tokens=2500):
improvement_output += chunk
feedback_box.markdown(improvement_output, unsafe_allow_html=True)
except Exception as e:
st.error(f"β Error while analyzing paper: {e}")
# --- Full Chat History Viewer ---
st.divider()
st.subheader("π Full Chat History")
with st.expander("View Chat History", expanded=False):
for msg in st.session_state.chat_history:
with st.chat_message(msg["role"] if msg["role"] in ["user", "assistant"] else "assistant"):
st.markdown(msg["content"]) |