Ani14 commited on
Commit
27f01b8
Β·
verified Β·
1 Parent(s): 2b97b69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -1
app.py CHANGED
@@ -93,4 +93,130 @@ def get_semantic_papers(query):
93
  papers = response.json().get("data", [])
94
  return [{
95
  "title": p.get("title"),
96
- "summary":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  papers = response.json().get("data", [])
94
  return [{
95
  "title": p.get("title"),
96
+ "summary": p.get("abstract", "No abstract available"),
97
+ "url": p.get("url")
98
+ } for p in papers]
99
+
100
+ def generate_apa_citation(title, url, source):
101
+ year = datetime.datetime.now().year
102
+ label = {
103
+ "arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"
104
+ }.get(source, "*Web*")
105
+ return f"{title}. ({year}). {label}. {url}"
106
+
107
+ def check_plagiarism(text, topic):
108
+ hits = []
109
+ for r in get_sources(topic, ""):
110
+ similarity = fuzz.token_set_ratio(text, r["snippet"])
111
+ if similarity >= 75:
112
+ hits.append(r)
113
+ return hits
114
+
115
+ def remove_duplicates(entries):
116
+ unique = []
117
+ titles = []
118
+ for e in entries:
119
+ if all(fuzz.token_set_ratio(e["title"], t) < 85 for t in titles):
120
+ titles.append(e["title"])
121
+ unique.append(e)
122
+ return unique
123
+
124
+ def generate_image_from_topic(topic):
125
+ img_prompt = f"Illustration representing '{topic}' in a research or technology context."
126
+ image_url = f"https://source.unsplash.com/featured/?{quote_plus(topic)}"
127
+ return image_url
128
+
129
+ def generate_pdf(text):
130
+ pdf = FPDF()
131
+ pdf.add_page()
132
+ pdf.set_auto_page_break(auto=True, margin=15)
133
+ pdf.set_font("Arial", size=12)
134
+ for line in text.split("\n"):
135
+ pdf.multi_cell(0, 10, line)
136
+ buffer = BytesIO()
137
+ pdf.output(buffer)
138
+ buffer.seek(0)
139
+ return buffer
140
+
141
+ # --- Execution ---
142
+ if st.button("Research"):
143
+ try:
144
+ with st.spinner("πŸ” Gathering relevant research..."):
145
+ all_entries = []
146
+ citations = []
147
+
148
+ if source_type in ["Web Only", "Hybrid"]:
149
+ web_data = get_sources(topic, custom_domains)
150
+ web_data = remove_duplicates(web_data)
151
+ for w in web_data:
152
+ all_entries.append({
153
+ "title": w['title'],
154
+ "summary": w['snippet'],
155
+ "url": w['url'],
156
+ "source": "web"
157
+ })
158
+ citations.append(generate_apa_citation(w['title'], w['url'], "web"))
159
+
160
+ if source_type in ["Academic Only", "Hybrid"]:
161
+ arxiv_data = get_arxiv_papers(topic)
162
+ semantic_data = get_semantic_papers(topic)
163
+ academic_data = remove_duplicates(arxiv_data + semantic_data)
164
+ for a in academic_data:
165
+ all_entries.append({
166
+ "title": a['title'],
167
+ "summary": a['summary'],
168
+ "url": a['url'],
169
+ "source": "arxiv" if "arxiv" in a['url'] else "semantic"
170
+ })
171
+ citations.append(generate_apa_citation(a['title'], a['url'], a['source']))
172
+
173
+ st.success("βœ… Data collected and filtered!")
174
+
175
+ with st.spinner("🧠 Writing final research report..."):
176
+ sources_text = ""
177
+ for e in all_entries:
178
+ sources_text += f"- [{e['title']}]({e['url']})\n> {e['summary'][:300]}...\n\n"
179
+
180
+ prompt = f"""
181
+ # Research Task: {topic}
182
+ Tone: {tone}
183
+ Report Type: {report_type}
184
+ Sources:
185
+ {sources_text}
186
+ Now, synthesize:
187
+ 1. Research questions and gap
188
+ 2. A novel insight or direction
189
+ 3. A real-world application scenario
190
+ 4. A {report_type.lower()} in paragraph format (use bullet points only if the paragraph is too long).
191
+ Use larger heading for sections and slightly smaller for sub-sections. Do not use markdown or HTML, just plain text.
192
+ """
193
+ output = call_llm([{"role": "user", "content": prompt}], max_tokens=3500)
194
+
195
+ st.header("πŸ“„ Research Report")
196
+ st.write(output)
197
+
198
+ st.subheader("πŸ“š APA Citations")
199
+ for c in citations:
200
+ st.markdown(f"- {c}")
201
+
202
+ with st.spinner("πŸ§ͺ Checking for overlaps..."):
203
+ overlaps = check_plagiarism(output, topic)
204
+ if overlaps:
205
+ st.warning("⚠️ Potential content overlap found.")
206
+ for h in overlaps:
207
+ st.markdown(f"**{h['title']}** - [{h['url']}]({h['url']})")
208
+ else:
209
+ st.success("βœ… No major overlaps detected.")
210
+
211
+ if report_type.startswith("Thorough"):
212
+ st.subheader("πŸ–ΌοΈ Related Visual")
213
+ image_url = generate_image_from_topic(topic)
214
+ st.image(image_url, caption=f"Visual related to: {topic}", use_column_width=True)
215
+
216
+ st.subheader("πŸ“₯ Download Options")
217
+ pdf_file = generate_pdf(output)
218
+ st.download_button("πŸ“„ Download PDF", data=pdf_file, file_name=f"{topic}_report.pdf", mime="application/pdf")
219
+ st.download_button("πŸ“œ Download LaTeX (raw text)", data=output, file_name=f"{topic}_report.tex", mime="text/plain")
220
+
221
+ except Exception as e:
222
+ st.error(f"Error: {e}")