Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,7 @@ from PIL import Image
|
|
| 12 |
from io import BytesIO
|
| 13 |
from fpdf import FPDF
|
| 14 |
import base64
|
|
|
|
| 15 |
from duckduckgo_search import DDGS
|
| 16 |
|
| 17 |
# --- Load API Keys ---
|
|
@@ -22,7 +23,7 @@ tavily = TavilyClient(api_key=TAVILY_API_KEY)
|
|
| 22 |
|
| 23 |
# --- Helper Functions ---
|
| 24 |
|
| 25 |
-
def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=
|
| 26 |
url = "https://openrouter.ai/api/v1/chat/completions"
|
| 27 |
headers = {
|
| 28 |
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
|
@@ -114,14 +115,6 @@ def extract_year_from_text(text):
|
|
| 114 |
years = re.findall(r"\b(19|20)\d{2}\b", text)
|
| 115 |
return int(years[0]) if years else 9999
|
| 116 |
|
| 117 |
-
def generate_apa_citation(title, url, source, year=None):
|
| 118 |
-
title = title or "Untitled Source"
|
| 119 |
-
url = url or "Unavailable URL"
|
| 120 |
-
year = year if isinstance(year, int) and 1900 <= year <= datetime.datetime.now().year else "n.d."
|
| 121 |
-
source_map = {"arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"}
|
| 122 |
-
label = source_map.get(source, "*External Source*")
|
| 123 |
-
return f"{title}. ({year}). {label}. {url}"
|
| 124 |
-
|
| 125 |
def merge_duplicates(entries):
|
| 126 |
unique = []
|
| 127 |
seen_titles = []
|
|
@@ -148,26 +141,34 @@ def build_chronological_progression(sources):
|
|
| 148 |
summary += f"**{year}**\n{entries}\n\n"
|
| 149 |
return summary.strip()
|
| 150 |
|
| 151 |
-
# --- Streamlit
|
| 152 |
-
st.set_page_config(page_title="π§ Deep Research Assistant", layout="centered")
|
| 153 |
|
| 154 |
-
if "
|
| 155 |
-
st.session_state
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
with st.sidebar:
|
| 158 |
-
st.title("
|
| 159 |
topic = st.text_input("π Enter your research topic")
|
| 160 |
report_type = st.selectbox("π Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
|
| 161 |
tone = st.selectbox("π― Tone", ["Objective", "Persuasive", "Narrative"])
|
| 162 |
source_type = st.selectbox("π Sources", ["Web Only", "Academic Only", "Hybrid"])
|
| 163 |
-
custom_domains = st.text_input("π Optional
|
| 164 |
-
research_button = st.button("π
|
| 165 |
|
|
|
|
| 166 |
st.title("π Research Output")
|
| 167 |
|
| 168 |
if research_button and topic:
|
| 169 |
try:
|
| 170 |
-
with st.status("
|
| 171 |
all_sources = []
|
| 172 |
if source_type in ["Web Only", "Hybrid"]:
|
| 173 |
all_sources += get_sources(topic, custom_domains) if custom_domains.strip() else get_sources(topic)
|
|
@@ -182,67 +183,74 @@ if research_button and topic:
|
|
| 182 |
merged = sort_sources_chronologically(merged)
|
| 183 |
chronological_progress = build_chronological_progression(merged)
|
| 184 |
|
| 185 |
-
#
|
| 186 |
-
st.
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
st.image(m["image_url"], caption=m["title"], use_container_width=True)
|
| 193 |
-
image_shown = True
|
| 194 |
-
if not image_shown:
|
| 195 |
-
st.info("βΉοΈ No image previews available.")
|
| 196 |
-
|
| 197 |
-
# --- Building Final Prompt ---
|
| 198 |
-
citations = [generate_apa_citation(m.get('title'), m.get('url'), m.get('source'), m.get('year')) for m in merged]
|
| 199 |
-
combined_text = "\n\n".join([
|
| 200 |
-
f"- [{m['title']}]({m['url']}) ({m.get('year', 'n.d.')})\n> {m.get('snippet', m.get('summary', ''))[:300]}..."
|
| 201 |
-
for m in merged
|
| 202 |
])
|
|
|
|
| 203 |
length_instruction = {
|
| 204 |
-
"Summary": "Keep it
|
| 205 |
-
"Detailed Report": "
|
| 206 |
-
"Thorough Academic Research": "
|
| 207 |
}[report_type]
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
prompt = f"""
|
| 210 |
-
|
| 211 |
|
| 212 |
-
|
|
|
|
| 213 |
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
2. Gap Identification: what is missing.
|
| 217 |
-
3. Novel Contribution: suggest improvements.-- Suggest a Novel topic based on the gaps and provide an overview on how a researcher can benefit from the topic
|
| 218 |
-
4. Structured Report: headings, clarity- with all headings and discussions like a proper research paper
|
| 219 |
|
| 220 |
-
|
| 221 |
-
|
| 222 |
|
| 223 |
-
|
| 224 |
-
|
| 225 |
|
| 226 |
-
|
| 227 |
-
|
| 228 |
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
|
| 233 |
-
# --- Generate
|
| 234 |
st.subheader(f"π {report_type} on '{topic}'")
|
| 235 |
output_placeholder = st.empty()
|
| 236 |
-
|
| 237 |
for chunk in call_llm([{"role": "user", "content": prompt}]):
|
| 238 |
-
|
| 239 |
-
output_placeholder.markdown(
|
| 240 |
|
| 241 |
-
st.session_state
|
|
|
|
| 242 |
|
| 243 |
except Exception as e:
|
| 244 |
st.error(f"β Error: {e}")
|
| 245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
|
| 247 |
# π§ Initialize session state
|
| 248 |
if "last_report" not in st.session_state:
|
|
|
|
| 12 |
from io import BytesIO
|
| 13 |
from fpdf import FPDF
|
| 14 |
import base64
|
| 15 |
+
import uuid
|
| 16 |
from duckduckgo_search import DDGS
|
| 17 |
|
| 18 |
# --- Load API Keys ---
|
|
|
|
| 23 |
|
| 24 |
# --- Helper Functions ---
|
| 25 |
|
| 26 |
+
def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=4000, temperature=0.7):
|
| 27 |
url = "https://openrouter.ai/api/v1/chat/completions"
|
| 28 |
headers = {
|
| 29 |
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
|
|
|
| 115 |
years = re.findall(r"\b(19|20)\d{2}\b", text)
|
| 116 |
return int(years[0]) if years else 9999
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
def merge_duplicates(entries):
|
| 119 |
unique = []
|
| 120 |
seen_titles = []
|
|
|
|
| 141 |
summary += f"**{year}**\n{entries}\n\n"
|
| 142 |
return summary.strip()
|
| 143 |
|
| 144 |
+
# --- Initialize Streamlit Session ---
|
| 145 |
+
st.set_page_config(page_title="π§ Deep Research Assistant 2.0", layout="centered")
|
| 146 |
|
| 147 |
+
if "memory_bank" not in st.session_state:
|
| 148 |
+
st.session_state.memory_bank = []
|
| 149 |
|
| 150 |
+
if "chat_threads" not in st.session_state:
|
| 151 |
+
st.session_state.chat_threads = {}
|
| 152 |
+
|
| 153 |
+
if "current_thread_id" not in st.session_state:
|
| 154 |
+
st.session_state.current_thread_id = None
|
| 155 |
+
|
| 156 |
+
# --- Sidebar Inputs ---
|
| 157 |
with st.sidebar:
|
| 158 |
+
st.title("Deep Research Assistant 2.0 π")
|
| 159 |
topic = st.text_input("π Enter your research topic")
|
| 160 |
report_type = st.selectbox("π Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
|
| 161 |
tone = st.selectbox("π― Tone", ["Objective", "Persuasive", "Narrative"])
|
| 162 |
source_type = st.selectbox("π Sources", ["Web Only", "Academic Only", "Hybrid"])
|
| 163 |
+
custom_domains = st.text_input("π Optional Domains", placeholder="example.com, nature.com")
|
| 164 |
+
research_button = st.button("π Start Research")
|
| 165 |
|
| 166 |
+
# --- Main Logic ---
|
| 167 |
st.title("π Research Output")
|
| 168 |
|
| 169 |
if research_button and topic:
|
| 170 |
try:
|
| 171 |
+
with st.status("π Gathering sources..."):
|
| 172 |
all_sources = []
|
| 173 |
if source_type in ["Web Only", "Hybrid"]:
|
| 174 |
all_sources += get_sources(topic, custom_domains) if custom_domains.strip() else get_sources(topic)
|
|
|
|
| 183 |
merged = sort_sources_chronologically(merged)
|
| 184 |
chronological_progress = build_chronological_progression(merged)
|
| 185 |
|
| 186 |
+
# Prepare previous learnings
|
| 187 |
+
previous_learnings = "\n\n".join(st.session_state.memory_bank[-5:]) # last 5 learnings
|
| 188 |
+
|
| 189 |
+
citations = [f"- {s['title']} ({s['year']}) [{s['source']}]({s['url']})" for s in merged]
|
| 190 |
+
sources_text = "\n".join([
|
| 191 |
+
f"- [{s['title']}]({s['url']}) ({s['year']})\n> {s.get('snippet', s.get('summary', ''))[:300]}..."
|
| 192 |
+
for s in merged
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
])
|
| 194 |
+
|
| 195 |
length_instruction = {
|
| 196 |
+
"Summary": "Keep it concise, under 300 words.",
|
| 197 |
+
"Detailed Report": "Write 500-700 words with critical insights.",
|
| 198 |
+
"Thorough Academic Research": "Craft a full academic paper >1000 words."
|
| 199 |
}[report_type]
|
| 200 |
|
| 201 |
+
# Create Thread ID
|
| 202 |
+
thread_id = str(uuid.uuid4())
|
| 203 |
+
st.session_state.current_thread_id = thread_id
|
| 204 |
+
st.session_state.chat_threads[thread_id] = []
|
| 205 |
+
|
| 206 |
+
# --- LLM Prompt ---
|
| 207 |
prompt = f"""
|
| 208 |
+
You are an expert research assistant.
|
| 209 |
|
| 210 |
+
π΅ Past Knowledge:
|
| 211 |
+
{previous_learnings}
|
| 212 |
|
| 213 |
+
π΅ New Research Topic:
|
| 214 |
+
{topic}
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
+
π΅ Writing Style:
|
| 217 |
+
{tone} tone, {length_instruction}
|
| 218 |
|
| 219 |
+
π΅ Research Timeline:
|
| 220 |
+
{chronological_progress}
|
| 221 |
|
| 222 |
+
π΅ Sources:
|
| 223 |
+
{sources_text}
|
| 224 |
|
| 225 |
+
π΅ Citations:
|
| 226 |
+
{chr(10).join(citations)}
|
| 227 |
+
"""
|
| 228 |
|
| 229 |
+
# --- Generate Report ---
|
| 230 |
st.subheader(f"π {report_type} on '{topic}'")
|
| 231 |
output_placeholder = st.empty()
|
| 232 |
+
final_output = ""
|
| 233 |
for chunk in call_llm([{"role": "user", "content": prompt}]):
|
| 234 |
+
final_output += chunk
|
| 235 |
+
output_placeholder.markdown(final_output, unsafe_allow_html=True)
|
| 236 |
|
| 237 |
+
st.session_state.memory_bank.append(final_output)
|
| 238 |
+
st.session_state.chat_threads[thread_id].append({"role": "assistant", "content": final_output})
|
| 239 |
|
| 240 |
except Exception as e:
|
| 241 |
st.error(f"β Error: {e}")
|
| 242 |
|
| 243 |
+
# --- Show Chat Threads ---
|
| 244 |
+
st.divider()
|
| 245 |
+
st.subheader("π Your Research Threads")
|
| 246 |
+
|
| 247 |
+
for tid, chats in st.session_state.chat_threads.items():
|
| 248 |
+
with st.expander(f"Thread {tid[:8]}"):
|
| 249 |
+
for msg in chats:
|
| 250 |
+
role = "π§ You" if msg['role'] == 'user' else "π€ Assistant"
|
| 251 |
+
st.markdown(f"**{role}:** {msg['content']}")
|
| 252 |
+
|
| 253 |
+
|
| 254 |
|
| 255 |
# π§ Initialize session state
|
| 256 |
if "last_report" not in st.session_state:
|