Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,6 @@ import datetime
|
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
from tavily import TavilyClient
|
| 7 |
import feedparser
|
| 8 |
-
import time
|
| 9 |
from fuzzywuzzy import fuzz
|
| 10 |
from PIL import Image
|
| 11 |
from io import BytesIO
|
|
@@ -44,14 +43,12 @@ def get_sources(topic, domains=None):
|
|
| 44 |
domain_filters = [d.strip() for d in domains.split(",") if d.strip()]
|
| 45 |
query += " site:" + " OR site:".join(domain_filters)
|
| 46 |
response = tavily.search(query=query, search_depth="advanced", max_results=10)
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
})
|
| 54 |
-
return sources
|
| 55 |
|
| 56 |
def get_arxiv_papers(query):
|
| 57 |
from urllib.parse import quote_plus
|
|
@@ -60,19 +57,24 @@ def get_arxiv_papers(query):
|
|
| 60 |
return [{
|
| 61 |
"title": e.title,
|
| 62 |
"summary": e.summary.replace("\n", " ").strip(),
|
| 63 |
-
"url": next((l.href for l in e.links if l.type == "application/pdf"), "")
|
|
|
|
| 64 |
} for e in feed.entries]
|
| 65 |
|
| 66 |
def get_semantic_papers(query):
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
def check_plagiarism(text, topic):
|
| 78 |
hits = []
|
|
@@ -84,9 +86,7 @@ def check_plagiarism(text, topic):
|
|
| 84 |
|
| 85 |
def generate_apa_citation(title, url, source):
|
| 86 |
year = datetime.datetime.now().year
|
| 87 |
-
label = {
|
| 88 |
-
"arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"
|
| 89 |
-
}.get(source, "*Web*")
|
| 90 |
return f"{title}. ({year}). {label}. {url}"
|
| 91 |
|
| 92 |
def merge_duplicates(entries):
|
|
@@ -126,143 +126,69 @@ def generate_download_button(file, label, mime_type):
|
|
| 126 |
"""
|
| 127 |
|
| 128 |
# --- Streamlit UI ---
|
| 129 |
-
st.set_page_config("Deep Research Bot", layout="
|
| 130 |
st.markdown("""
|
| 131 |
<style>
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
}
|
| 136 |
-
|
| 137 |
-
.stApp {
|
| 138 |
-
background-color: #1e2a38;
|
| 139 |
-
color: #ffffff;
|
| 140 |
-
}
|
| 141 |
-
|
| 142 |
-
/* Text fields, dropdowns, and inputs */
|
| 143 |
-
.stTextInput > div > div > input,
|
| 144 |
-
.stSelectbox > div > div > div > div {
|
| 145 |
-
background-color: #ffffff;
|
| 146 |
-
color: #1e2a38;
|
| 147 |
-
}
|
| 148 |
-
|
| 149 |
-
/* Fix labels in sidebar to show on dark background */
|
| 150 |
-
.stSidebar label,
|
| 151 |
-
.stTextInput label,
|
| 152 |
-
.stSelectbox label,
|
| 153 |
-
.stTextArea label {
|
| 154 |
-
color: #1e2a38 !important;
|
| 155 |
-
font-weight: bold;
|
| 156 |
-
}
|
| 157 |
-
|
| 158 |
-
/* Optional: Remove outline color on focus to match dark theme */
|
| 159 |
-
input:focus, select:focus {
|
| 160 |
-
outline: none !important;
|
| 161 |
-
box-shadow: 0 0 0 2px #4f46e5 !important; /* Optional focus ring */
|
| 162 |
-
}
|
| 163 |
</style>
|
| 164 |
""", unsafe_allow_html=True)
|
| 165 |
|
| 166 |
with st.sidebar:
|
| 167 |
-
st.title("Deep Research Assistant")
|
| 168 |
-
topic = st.text_input("
|
| 169 |
-
report_type = st.selectbox("Type
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
tone = st.selectbox("Tone of the report", [
|
| 175 |
-
"Objective - Impartial and unbiased presentation of facts and findings",
|
| 176 |
-
"Persuasive - Advocating a specific point of view",
|
| 177 |
-
"Narrative - Storytelling tone for layperson readers"
|
| 178 |
-
])
|
| 179 |
-
source_type = st.selectbox("Sources to include", ["Web Only", "Academic Only", "Hybrid"])
|
| 180 |
-
custom_domains = st.text_input("Query Domains (Optional)", placeholder="techcrunch.com, forbes.com")
|
| 181 |
-
research_button = st.button("Research")
|
| 182 |
|
| 183 |
-
st.title("Research Output")
|
| 184 |
|
| 185 |
if research_button and topic:
|
| 186 |
try:
|
| 187 |
-
with st.
|
| 188 |
-
st.info("Fetching from sources...")
|
| 189 |
-
|
| 190 |
all_sources = []
|
| 191 |
-
citations = []
|
| 192 |
-
|
| 193 |
if source_type in ["Web Only", "Hybrid"]:
|
| 194 |
-
|
| 195 |
-
for item in web_data:
|
| 196 |
-
all_sources.append(item | {"source": "web"})
|
| 197 |
-
|
| 198 |
if source_type in ["Academic Only", "Hybrid"]:
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
all_sources.append(item | {"source": "arxiv"})
|
| 202 |
-
semantic_data = get_semantic_papers(topic)
|
| 203 |
-
for item in semantic_data:
|
| 204 |
-
all_sources.append(item | {"source": "semantic"})
|
| 205 |
|
| 206 |
merged = merge_duplicates(all_sources)
|
| 207 |
-
|
| 208 |
-
for m in merged
|
| 209 |
-
combined_text += f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}...\n\n"
|
| 210 |
-
citations.append(generate_apa_citation(m['title'], m['url'], m['source']))
|
| 211 |
-
|
| 212 |
-
with st.spinner("Synthesizing report..."):
|
| 213 |
-
if report_type == "Summary - Short and fast )":
|
| 214 |
-
prompt = f"""
|
| 215 |
-
You are a research assistant. Based on the following sources related to the topic **{topic}**, provide a concise overview.
|
| 216 |
-
Analyze and summarize based on the selected sources: {'Web Only' if source_type == 'Web Only' else 'Academic Only' if source_type == 'Academic Only' else 'Hybrid (Web + Academic)'}.
|
| 217 |
-
Use a clear and accessible tone suitable for readers who want a quick understanding.
|
| 218 |
-
|
| 219 |
-
"""
|
| 220 |
-
|
| 221 |
-
elif report_type == "Detailed Report ":
|
| 222 |
-
prompt = f"""
|
| 223 |
-
You are a research analyst tasked with writing a structured research brief on the topic **{topic}**.
|
| 224 |
-
Use the following sources ({'Web Only' if source_type == 'Web Only' else 'Academic Only' if source_type == 'Academic Only' else 'Hybrid'}) to:
|
| 225 |
-
1. Write an **Introduction/Abstract** giving context and importance of the topic.
|
| 226 |
-
2. Identify and explain the **Research Gap** present in the existing knowledge or implementations.
|
| 227 |
-
3. Propose a **Novel Insight or Contribution** that can address the research gap.
|
| 228 |
-
4. Include a section for **Citations in APA format** using the sources provided.
|
| 229 |
-
|
| 230 |
-
"""
|
| 231 |
-
|
| 232 |
-
else: # Thorough Academic Research
|
| 233 |
-
prompt = f"""
|
| 234 |
-
You are an expert researcher writing a full academic paper on the topic **{topic}** using sources from {'Web Only' if source_type == 'Web Only' else 'Academic Only' if source_type == 'Academic Only' else 'Hybrid'}.
|
| 235 |
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
2. **Research Gap**: Identify current gaps or underexplored areas in this field.
|
| 239 |
-
3. **Novelty/Contribution**: Describe the new idea, method, or perspective this paper offers.
|
| 240 |
-
4. **Methodology**: Outline methods, models, or frameworks that can be applied to achieve this novelty.
|
| 241 |
-
5. **Comparative Analysis** *(if applicable)*: Compare existing models/methods with the proposed one.
|
| 242 |
-
6. **Future Directions**: Suggest further exploration paths or follow-up research.
|
| 243 |
-
7. **Citations**: Include in-text references and a citation section in **APA format** only.
|
| 244 |
|
| 245 |
-
|
|
|
|
| 246 |
|
| 247 |
-
|
|
|
|
|
|
|
| 248 |
|
| 249 |
-
st.
|
|
|
|
| 250 |
st.markdown(final_output, unsafe_allow_html=True)
|
| 251 |
|
| 252 |
-
if report_type == "Thorough Academic Research
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
st.markdown(generate_download_button(latex_file, "Research_Report.tex", "application/x-latex"), unsafe_allow_html=True)
|
| 258 |
|
| 259 |
overlaps = check_plagiarism(final_output, topic)
|
|
|
|
|
|
|
| 260 |
if overlaps:
|
| 261 |
st.warning("Potential overlaps detected:")
|
| 262 |
for hit in overlaps:
|
| 263 |
st.markdown(f"- [{hit['title']}]({hit['url']})")
|
| 264 |
else:
|
| 265 |
-
st.success("No major overlaps found.")
|
| 266 |
|
| 267 |
except Exception as e:
|
| 268 |
-
st.error(f"Error: {e}")
|
|
|
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
from tavily import TavilyClient
|
| 7 |
import feedparser
|
|
|
|
| 8 |
from fuzzywuzzy import fuzz
|
| 9 |
from PIL import Image
|
| 10 |
from io import BytesIO
|
|
|
|
| 43 |
domain_filters = [d.strip() for d in domains.split(",") if d.strip()]
|
| 44 |
query += " site:" + " OR site:".join(domain_filters)
|
| 45 |
response = tavily.search(query=query, search_depth="advanced", max_results=10)
|
| 46 |
+
return [{
|
| 47 |
+
"title": r["title"],
|
| 48 |
+
"url": r["url"],
|
| 49 |
+
"snippet": r.get("content", ""),
|
| 50 |
+
"source": "web"
|
| 51 |
+
} for r in response.get("results", [])]
|
|
|
|
|
|
|
| 52 |
|
| 53 |
def get_arxiv_papers(query):
|
| 54 |
from urllib.parse import quote_plus
|
|
|
|
| 57 |
return [{
|
| 58 |
"title": e.title,
|
| 59 |
"summary": e.summary.replace("\n", " ").strip(),
|
| 60 |
+
"url": next((l.href for l in e.links if l.type == "application/pdf"), ""),
|
| 61 |
+
"source": "arxiv"
|
| 62 |
} for e in feed.entries]
|
| 63 |
|
| 64 |
def get_semantic_papers(query):
|
| 65 |
+
try:
|
| 66 |
+
url = "https://api.semanticscholar.org/graph/v1/paper/search"
|
| 67 |
+
params = {"query": query, "limit": 5, "fields": "title,abstract,url"}
|
| 68 |
+
response = requests.get(url, params=params)
|
| 69 |
+
papers = response.json().get("data", [])
|
| 70 |
+
return [{
|
| 71 |
+
"title": p.get("title"),
|
| 72 |
+
"summary": p.get("abstract", "No abstract available"),
|
| 73 |
+
"url": p.get("url"),
|
| 74 |
+
"source": "semantic"
|
| 75 |
+
} for p in papers]
|
| 76 |
+
except:
|
| 77 |
+
return []
|
| 78 |
|
| 79 |
def check_plagiarism(text, topic):
|
| 80 |
hits = []
|
|
|
|
| 86 |
|
| 87 |
def generate_apa_citation(title, url, source):
|
| 88 |
year = datetime.datetime.now().year
|
| 89 |
+
label = {"arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"}.get(source, "*Web*")
|
|
|
|
|
|
|
| 90 |
return f"{title}. ({year}). {label}. {url}"
|
| 91 |
|
| 92 |
def merge_duplicates(entries):
|
|
|
|
| 126 |
"""
|
| 127 |
|
| 128 |
# --- Streamlit UI ---
|
| 129 |
+
st.set_page_config("Deep Research Bot", layout="centered")
|
| 130 |
st.markdown("""
|
| 131 |
<style>
|
| 132 |
+
.stApp { background-color: #0f172a; color: white; }
|
| 133 |
+
h1, h2, h3 { color: #facc15; }
|
| 134 |
+
.css-1d391kg, .css-1kyxreq, .css-q8sbsg { background-color: #1e293b; color: white; border-radius: 10px; padding: 10px; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
</style>
|
| 136 |
""", unsafe_allow_html=True)
|
| 137 |
|
| 138 |
with st.sidebar:
|
| 139 |
+
st.title("π§ Deep Research Assistant")
|
| 140 |
+
topic = st.text_input("π Enter your research topic")
|
| 141 |
+
report_type = st.selectbox("π Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
|
| 142 |
+
tone = st.selectbox("π― Tone", ["Objective", "Persuasive", "Narrative"])
|
| 143 |
+
source_type = st.selectbox("π Sources", ["Web Only", "Academic Only", "Hybrid"])
|
| 144 |
+
custom_domains = st.text_input("π Optional Web Domains", placeholder="example.com, techcrunch.com")
|
| 145 |
+
research_button = st.button("π Run Deep Research")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
+
st.title("π Research Output")
|
| 148 |
|
| 149 |
if research_button and topic:
|
| 150 |
try:
|
| 151 |
+
with st.spinner("Gathering sources and analyzing deeply..."):
|
|
|
|
|
|
|
| 152 |
all_sources = []
|
|
|
|
|
|
|
| 153 |
if source_type in ["Web Only", "Hybrid"]:
|
| 154 |
+
all_sources += get_sources(topic, custom_domains)
|
|
|
|
|
|
|
|
|
|
| 155 |
if source_type in ["Academic Only", "Hybrid"]:
|
| 156 |
+
all_sources += get_arxiv_papers(topic)
|
| 157 |
+
all_sources += get_semantic_papers(topic)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
merged = merge_duplicates(all_sources)
|
| 160 |
+
citations = [generate_apa_citation(m['title'], m['url'], m['source']) for m in merged]
|
| 161 |
+
combined_text = "\n\n".join([f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}..." for m in merged])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
+
prompt = f"""
|
| 164 |
+
You are an expert assistant. Based on the following sources, write a {report_type.lower()} in a {tone.lower()} tone on the topic: {topic}.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
+
Sources:
|
| 167 |
+
{combined_text}
|
| 168 |
|
| 169 |
+
APA Citations:
|
| 170 |
+
{chr(10).join(citations)}
|
| 171 |
+
"""
|
| 172 |
|
| 173 |
+
st.subheader(f"π {report_type} on '{topic}'")
|
| 174 |
+
final_output = call_llm([{"role": "user", "content": prompt}])
|
| 175 |
st.markdown(final_output, unsafe_allow_html=True)
|
| 176 |
|
| 177 |
+
if report_type == "Thorough Academic Research":
|
| 178 |
+
st.markdown("---")
|
| 179 |
+
st.subheader("π Downloads")
|
| 180 |
+
st.markdown(generate_download_button(generate_pdf(final_output), "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
|
| 181 |
+
st.markdown(generate_download_button(generate_latex(final_output), "Research_Report.tex", "application/x-latex"), unsafe_allow_html=True)
|
|
|
|
| 182 |
|
| 183 |
overlaps = check_plagiarism(final_output, topic)
|
| 184 |
+
st.markdown("---")
|
| 185 |
+
st.subheader("π Plagiarism Check")
|
| 186 |
if overlaps:
|
| 187 |
st.warning("Potential overlaps detected:")
|
| 188 |
for hit in overlaps:
|
| 189 |
st.markdown(f"- [{hit['title']}]({hit['url']})")
|
| 190 |
else:
|
| 191 |
+
st.success("β
No major overlaps found.")
|
| 192 |
|
| 193 |
except Exception as e:
|
| 194 |
+
st.error(f"β Error occurred: {e}")
|