Spaces:

Ani14
/

AutoReasearcher

Sleeping

App Files Files Community

Ani14 commited on Apr 21, 2025

Commit

27f01b8

verified ·

1 Parent(s): 2b97b69

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -1

app.py CHANGED Viewed

@@ -93,4 +93,130 @@ def get_semantic_papers(query):
     papers = response.json().get("data", [])
     return [{
         "title": p.get("title"),
-        "summary":

     papers = response.json().get("data", [])
     return [{
         "title": p.get("title"),
+        "summary": p.get("abstract", "No abstract available"),
+        "url": p.get("url")
+    } for p in papers]
+def generate_apa_citation(title, url, source):
+    year = datetime.datetime.now().year
+    label = {
+        "arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"
+    }.get(source, "*Web*")
+    return f"{title}. ({year}). {label}. {url}"
+def check_plagiarism(text, topic):
+    hits = []
+    for r in get_sources(topic, ""):
+        similarity = fuzz.token_set_ratio(text, r["snippet"])
+        if similarity >= 75:
+            hits.append(r)
+    return hits
+def remove_duplicates(entries):
+    unique = []
+    titles = []
+    for e in entries:
+        if all(fuzz.token_set_ratio(e["title"], t) < 85 for t in titles):
+            titles.append(e["title"])
+            unique.append(e)
+    return unique
+def generate_image_from_topic(topic):
+    img_prompt = f"Illustration representing '{topic}' in a research or technology context."
+    image_url = f"https://source.unsplash.com/featured/?{quote_plus(topic)}"
+    return image_url
+def generate_pdf(text):
+    pdf = FPDF()
+    pdf.add_page()
+    pdf.set_auto_page_break(auto=True, margin=15)
+    pdf.set_font("Arial", size=12)
+    for line in text.split("\n"):
+        pdf.multi_cell(0, 10, line)
+    buffer = BytesIO()
+    pdf.output(buffer)
+    buffer.seek(0)
+    return buffer
+# --- Execution ---
+if st.button("Research"):
+    try:
+        with st.spinner("🔍 Gathering relevant research..."):
+            all_entries = []
+            citations = []
+            if source_type in ["Web Only", "Hybrid"]:
+                web_data = get_sources(topic, custom_domains)
+                web_data = remove_duplicates(web_data)
+                for w in web_data:
+                    all_entries.append({
+                        "title": w['title'],
+                        "summary": w['snippet'],
+                        "url": w['url'],
+                        "source": "web"
+                    })
+                    citations.append(generate_apa_citation(w['title'], w['url'], "web"))
+            if source_type in ["Academic Only", "Hybrid"]:
+                arxiv_data = get_arxiv_papers(topic)
+                semantic_data = get_semantic_papers(topic)
+                academic_data = remove_duplicates(arxiv_data + semantic_data)
+                for a in academic_data:
+                    all_entries.append({
+                        "title": a['title'],
+                        "summary": a['summary'],
+                        "url": a['url'],
+                        "source": "arxiv" if "arxiv" in a['url'] else "semantic"
+                    })
+                    citations.append(generate_apa_citation(a['title'], a['url'], a['source']))
+        st.success("✅ Data collected and filtered!")
+        with st.spinner("🧠 Writing final research report..."):
+            sources_text = ""
+            for e in all_entries:
+                sources_text += f"- [{e['title']}]({e['url']})\n> {e['summary'][:300]}...\n\n"
+            prompt = f"""
+# Research Task: {topic}
+Tone: {tone}
+Report Type: {report_type}
+Sources:
+{sources_text}
+Now, synthesize:
+1. Research questions and gap
+2. A novel insight or direction
+3. A real-world application scenario
+4. A {report_type.lower()} in paragraph format (use bullet points only if the paragraph is too long).
+Use larger heading for sections and slightly smaller for sub-sections. Do not use markdown or HTML, just plain text.
+            """
+            output = call_llm([{"role": "user", "content": prompt}], max_tokens=3500)
+        st.header("📄 Research Report")
+        st.write(output)
+        st.subheader("📚 APA Citations")
+        for c in citations:
+            st.markdown(f"- {c}")
+        with st.spinner("🧪 Checking for overlaps..."):
+            overlaps = check_plagiarism(output, topic)
+            if overlaps:
+                st.warning("⚠️ Potential content overlap found.")
+                for h in overlaps:
+                    st.markdown(f"**{h['title']}** - [{h['url']}]({h['url']})")
+            else:
+                st.success("✅ No major overlaps detected.")
+        if report_type.startswith("Thorough"):
+            st.subheader("🖼️ Related Visual")
+            image_url = generate_image_from_topic(topic)
+            st.image(image_url, caption=f"Visual related to: {topic}", use_column_width=True)
+            st.subheader("📥 Download Options")
+            pdf_file = generate_pdf(output)
+            st.download_button("📄 Download PDF", data=pdf_file, file_name=f"{topic}_report.pdf", mime="application/pdf")
+            st.download_button("📜 Download LaTeX (raw text)", data=output, file_name=f"{topic}_report.tex", mime="text/plain")
+    except Exception as e:
+        st.error(f"Error: {e}")