Files changed (1) hide show
  1. app.py +29 -0
app.py CHANGED
@@ -3,6 +3,7 @@ import streamlit as st
3
  import requests
4
  import feedparser
5
  import datetime
 
6
  from dotenv import load_dotenv
7
  from duckduckgo_search import DDGS
8
 
@@ -34,6 +35,23 @@ def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2
34
  raise RuntimeError(f"Invalid response: {result}")
35
  return result["choices"][0]["message"]["content"]
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  # --- Source Utilities ---
38
  def get_arxiv_papers(query, max_results=3):
39
  from urllib.parse import quote_plus
@@ -161,6 +179,17 @@ if st.button("Run Research Agent"):
161
  st.session_state.chat_history.append({"role": "user", "content": topic})
162
  st.session_state.chat_history.append({"role": "assistant", "content": response})
163
  st.markdown(response)
 
 
 
 
 
 
 
 
 
 
 
164
  except Exception as e:
165
  st.error(f"Failed: {e}")
166
 
 
3
  import requests
4
  import feedparser
5
  import datetime
6
+ from fuzzywuzzy import fuzz
7
  from dotenv import load_dotenv
8
  from duckduckgo_search import DDGS
9
 
 
35
  raise RuntimeError(f"Invalid response: {result}")
36
  return result["choices"][0]["message"]["content"]
37
 
38
+ def check_plagiarism(text, query, threshold=70):
39
+ web_results = search_duckduckgo(query, max_results=5)
40
+ plagiarized_snippets = []
41
+
42
+ for result in web_results:
43
+ snippet = result.get("snippet", "")
44
+ similarity = fuzz.token_set_ratio(text, snippet)
45
+ if similarity >= threshold:
46
+ plagiarized_snippets.append({
47
+ "title": result["title"],
48
+ "url": result["url"],
49
+ "snippet": snippet,
50
+ "similarity": similarity
51
+ })
52
+
53
+ return plagiarized_snippets
54
+
55
  # --- Source Utilities ---
56
  def get_arxiv_papers(query, max_results=3):
57
  from urllib.parse import quote_plus
 
179
  st.session_state.chat_history.append({"role": "user", "content": topic})
180
  st.session_state.chat_history.append({"role": "assistant", "content": response})
181
  st.markdown(response)
182
+ # Check for plagiarism (optional feature)
183
+ plagiarism_hits = check_plagiarism(response, topic)
184
+
185
+ if plagiarism_hits:
186
+ st.warning("⚠️ Potential overlap with existing web content detected.")
187
+ st.subheader("🕵️ Plagiarism Check Results")
188
+ for hit in plagiarism_hits:
189
+ st.markdown(f"**{hit['title']}** - [{hit['url']}]({hit['url']})")
190
+ st.markdown(f"> _Similarity: {hit['similarity']}%_\n\n{hit['snippet']}")
191
+ else:
192
+ st.success("✅ No significant overlaps found. Content appears original.")
193
  except Exception as e:
194
  st.error(f"Failed: {e}")
195