Spaces:

Ani14
/

AutoReasearcher

Sleeping

App Files Files Community

Ani14 commited on Apr 23

Commit

afadb32

verified ·

1 Parent(s): 952c032

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -10

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
 tavily = TavilyClient(api_key=TAVILY_API_KEY)
 # --- Helper Functions ---
-def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=3500, temperature=0.7):
     url = "https://openrouter.ai/api/v1/chat/completions"
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
@@ -86,9 +86,10 @@ def get_sources(topic, domains=None):
             "url": r["url"],
             "snippet": r.get("content", ""),
             "image_url": image_url,
-            "source": "web"
         })
     return results
 def get_arxiv_papers(query):
@@ -99,24 +100,32 @@ def get_arxiv_papers(query):
         "title": e.title,
         "summary": e.summary.replace("\n", " ").strip(),
         "url": next((l.href for l in e.links if l.type == "application/pdf"), ""),
-        "source": "arxiv"
     } for e in feed.entries]
 def get_semantic_papers(query):
     try:
         url = "https://api.semanticscholar.org/graph/v1/paper/search"
-        params = {"query": query, "limit": 5, "fields": "title,abstract,url"}
         response = requests.get(url, params=params)
         papers = response.json().get("data", [])
         return [{
             "title": p.get("title"),
             "summary": p.get("abstract", "No abstract available"),
             "url": p.get("url"),
-            "source": "semantic"
         } for p in papers]
     except:
         return []
 def check_plagiarism(text, topic):
     hits = []
     for r in get_sources(topic):
@@ -125,8 +134,8 @@ def check_plagiarism(text, topic):
             hits.append(r)
     return hits
-def generate_apa_citation(title, url, source):
-    year = datetime.datetime.now().year
     label = {"arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"}.get(source, "*Web*")
     return f"{title}. ({year}). {label}. {url}"
@@ -187,6 +196,9 @@ def generate_download_button(file, label, mime_type):
         </a>
     """
 # --- Streamlit UI ---
 st.set_page_config("Deep Research Assistant", layout="centered")
@@ -237,6 +249,7 @@ if research_button and topic:
                 raise ValueError("❌ No sources found.")
             merged = merge_duplicates(all_sources)
         # 🔹 Image previews
         st.subheader("🖼 Source Previews")
@@ -251,12 +264,13 @@ if research_button and topic:
             st.info("ℹ️ No image previews available.")
         # 🔹 Generate report
-        citations = [generate_apa_citation(m['title'], m['url'], m['source']) for m in merged]
         combined_text = "\n\n".join([
-            f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}..."
             for m in merged
         ])
         prompt = f"""
         You are an expert research assistant.

 tavily = TavilyClient(api_key=TAVILY_API_KEY)
 # --- Helper Functions ---
+def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=20000, temperature=0.7):
     url = "https://openrouter.ai/api/v1/chat/completions"
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
             "url": r["url"],
             "snippet": r.get("content", ""),
             "image_url": image_url,
+            "source": "web",
+            "year": extract_year_from_text(r.get("content", ""))
         })
     return results
 def get_arxiv_papers(query):
         "title": e.title,
         "summary": e.summary.replace("\n", " ").strip(),
         "url": next((l.href for l in e.links if l.type == "application/pdf"), ""),
+        "source": "arxiv",
+        "year": int(e.published[:4]) if 'published' in e else 9999
     } for e in feed.entries]
 def get_semantic_papers(query):
     try:
         url = "https://api.semanticscholar.org/graph/v1/paper/search"
+        params = {"query": query, "limit": 5, "fields": "title,abstract,url,year"}
         response = requests.get(url, params=params)
         papers = response.json().get("data", [])
         return [{
             "title": p.get("title"),
             "summary": p.get("abstract", "No abstract available"),
             "url": p.get("url"),
+            "source": "semantic",
+            "year": p.get("year", 9999)
         } for p in papers]
     except:
         return []
+def extract_year_from_text(text):
+    import re
+    years = re.findall(r"\b(19|20)\d{2}\b", text)
+    return int(years[0]) if years else 9999
 def check_plagiarism(text, topic):
     hits = []
     for r in get_sources(topic):
             hits.append(r)
     return hits
+def generate_apa_citation(title, url, source, year=None):
+    year = year or datetime.datetime.now().year
     label = {"arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"}.get(source, "*Web*")
     return f"{title}. ({year}). {label}. {url}"
         </a>
     """
+def sort_sources_chronologically(sources):
+    return sorted(sources, key=lambda s: s.get("year", 9999))
 # --- Streamlit UI ---
 st.set_page_config("Deep Research Assistant", layout="centered")
                 raise ValueError("❌ No sources found.")
             merged = merge_duplicates(all_sources)
+            merged = sort_sources_chronologically(merged)
         # 🔹 Image previews
         st.subheader("🖼 Source Previews")
             st.info("ℹ️ No image previews available.")
         # 🔹 Generate report
+        citations = [generate_apa_citation(m['title'], m['url'], m['source'], m.get('year')) for m in merged]
         combined_text = "\n\n".join([
+            f"- [{m['title']}]({m['url']}) ({m.get('year', 'n.d.')})\n> {m.get('snippet', m.get('summary', ''))[:300]}..."
             for m in merged
         ])
         prompt = f"""
         You are an expert research assistant.