anasfsd123 commited on
Commit
fa4e803
Β·
verified Β·
1 Parent(s): 1dcf963

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +422 -0
app.py ADDED
@@ -0,0 +1,422 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import requests
4
+ import hashlib
5
+ from typing import List, Dict, Any
6
+ from datetime import datetime
7
+ import json
8
+ import re
9
+ from urllib.parse import quote
10
+ import time
11
+ import random
12
+ import functools
13
+
14
+ # Import required libraries
15
+ from crewai import Agent, Task, Crew, Process
16
+ from crewai.tools import BaseTool
17
+ import nltk
18
+ from textstat import flesch_reading_ease, flesch_kincaid_grade
19
+ from bs4 import BeautifulSoup
20
+ import concurrent.futures
21
+ from duckduckgo_search import DDGS
22
+
23
+ # Import Ollama and LangChain components
24
+ from langchain_community.chat_models import ChatOllama
25
+ from langchain_core.prompts import ChatPromptTemplate
26
+ from langchain_core.output_parsers import StrOutputParser
27
+
28
+ # Download NLTK data
29
+ try:
30
+ nltk.download('punkt', quiet=True)
31
+ nltk.download('stopwords', quiet=True)
32
+ nltk.download('wordnet', quiet=True)
33
+ except:
34
+ pass
35
+
36
+ # Custom Tools for CrewAI
37
+ class WebSearchTool(BaseTool):
38
+ name: str = "web_search"
39
+ description: str = "Search the web for content to check plagiarism"
40
+
41
+ def _run(self, query: str) -> str:
42
+ """Search the web using DuckDuckGo with rate limiting"""
43
+ try:
44
+ # Add delay to avoid overwhelming the search API
45
+ time.sleep(1)
46
+
47
+ with DDGS() as ddgs:
48
+ results = list(ddgs.text(query, max_results=5)) # Reduced from 10 to 5
49
+ search_results = []
50
+ for result in results:
51
+ search_results.append({
52
+ 'title': result.get('title', ''),
53
+ 'body': result.get('body', ''),
54
+ 'url': result.get('href', '')
55
+ })
56
+ return json.dumps(search_results)
57
+ except Exception as e:
58
+ return f"Search failed: {str(e)}"
59
+
60
+ class TextAnalysisTool(BaseTool):
61
+ name: str = "text_analysis"
62
+ description: str = "Analyze text for readability and quality metrics"
63
+
64
+ def _run(self, text: str) -> str:
65
+ """Analyze text quality"""
66
+ try:
67
+ # Calculate readability scores
68
+ flesch_score = flesch_reading_ease(text)
69
+ fk_grade = flesch_kincaid_grade(text)
70
+
71
+ # Word count and sentence analysis
72
+ words = text.split()
73
+ sentences = text.split('.')
74
+
75
+ analysis = {
76
+ 'word_count': len(words),
77
+ 'sentence_count': len(sentences),
78
+ 'avg_words_per_sentence': len(words) / max(len(sentences), 1),
79
+ 'flesch_reading_ease': flesch_score,
80
+ 'flesch_kincaid_grade': fk_grade,
81
+ 'readability_level': self._get_readability_level(flesch_score)
82
+ }
83
+
84
+ return json.dumps(analysis)
85
+ except Exception as e:
86
+ return f"Analysis failed: {str(e)}"
87
+
88
+ def _get_readability_level(self, score):
89
+ if score >= 90: return "Very Easy"
90
+ elif score >= 80: return "Easy"
91
+ elif score >= 70: return "Fairly Easy"
92
+ elif score >= 60: return "Standard"
93
+ elif score >= 50: return "Fairly Difficult"
94
+ elif score >= 30: return "Difficult"
95
+ else: return "Very Difficult"
96
+
97
+ class PlagiarismChecker(BaseTool):
98
+ name: str = "plagiarism_checker"
99
+ description: str = "Check text for potential plagiarism by comparing with web content"
100
+
101
+ def _run(self, text: str, search_results: str) -> str:
102
+ """Check for plagiarism by comparing text with search results"""
103
+ try:
104
+ results = json.loads(search_results)
105
+ text_sentences = [s.strip() for s in text.split('.') if s.strip()]
106
+
107
+ plagiarism_results = []
108
+ total_sentences = len(text_sentences)
109
+ flagged_sentences = 0
110
+
111
+ for sentence in text_sentences:
112
+ if len(sentence.split()) < 5: # Skip very short sentences
113
+ continue
114
+
115
+ similarity_found = False
116
+ for result in results:
117
+ content = result.get('body', '') + ' ' + result.get('title', '')
118
+
119
+ # Simple similarity check
120
+ if self._calculate_similarity(sentence, content) > 0.7:
121
+ similarity_found = True
122
+ flagged_sentences += 1
123
+ plagiarism_results.append({
124
+ 'sentence': sentence,
125
+ 'source': result.get('url', 'Unknown'),
126
+ 'similarity_score': self._calculate_similarity(sentence, content)
127
+ })
128
+ break
129
+
130
+ plagiarism_score = (flagged_sentences / max(total_sentences, 1)) * 100
131
+
132
+ return json.dumps({
133
+ 'plagiarism_score': plagiarism_score,
134
+ 'total_sentences': total_sentences,
135
+ 'flagged_sentences': flagged_sentences,
136
+ 'flagged_content': plagiarism_results[:3] # Return top 3 matches
137
+ })
138
+ except Exception as e:
139
+ return f"Plagiarism check failed: {str(e)}"
140
+
141
+ def _calculate_similarity(self, text1: str, text2: str) -> float:
142
+ """Calculate basic similarity between two texts"""
143
+ words1 = set(text1.lower().split())
144
+ words2 = set(text2.lower().split())
145
+
146
+ if not words1 or not words2:
147
+ return 0.0
148
+
149
+ intersection = words1.intersection(words2)
150
+ union = words1.union(words2)
151
+
152
+ return len(intersection) / len(union) if union else 0.0
153
+
154
+ # Rate limit handling decorator (can be kept for other potential API calls, though not strictly needed for local Ollama)
155
+ def rate_limit_handler(max_retries=5, base_delay=2, max_delay=60):
156
+ def decorator(func):
157
+ @functools.wraps(func)
158
+ def wrapper(*args, **kwargs):
159
+ for attempt in range(max_retries):
160
+ try:
161
+ return func(*args, **kwargs)
162
+ except Exception as e:
163
+ error_message = str(e).lower()
164
+ if "rate_limit" in error_message or "429" in error_message:
165
+ if attempt < max_retries - 1:
166
+ delay = min(max_delay, base_delay * (2 ** attempt) + random.uniform(0, 1))
167
+ st.warning(f"Rate limit hit. Retrying in {delay:.1f} seconds... (Attempt {attempt + 1}/{max_retries})")
168
+ time.sleep(delay)
169
+ else:
170
+ st.error(f"Max retries reached for rate limit: {e}")
171
+ raise e
172
+ else:
173
+ raise e
174
+ return None
175
+ return wrapper
176
+ return decorator
177
+
178
+ # Custom LLM class for CrewAI with Ollama
179
+ # Removed GroqLLM and replaced with direct ChatOllama usage
180
+
181
+ # Simplified agents for better token management
182
+ def create_agents(llm):
183
+ """Create specialized agents for different tasks"""
184
+
185
+ # Combined Analysis Agent (combines plagiarism and analysis)
186
+ analysis_agent = Agent(
187
+ role="Content Analyzer",
188
+ goal="Analyze text for plagiarism and quality metrics",
189
+ backstory="You are an expert in content analysis and plagiarism detection.",
190
+ tools=[WebSearchTool(), PlagiarismChecker(), TextAnalysisTool()],
191
+ verbose=True,
192
+ allow_delegation=False,
193
+ llm=llm
194
+ )
195
+
196
+ # Paraphrasing Agent
197
+ paraphrasing_agent = Agent(
198
+ role="Content Rewriter",
199
+ goal="Rewrite text to be original while maintaining meaning",
200
+ backstory="You are an expert writer who creates original content.",
201
+ verbose=True,
202
+ allow_delegation=False,
203
+ llm=llm
204
+ )
205
+
206
+ return analysis_agent, paraphrasing_agent
207
+
208
+ def create_tasks(input_text, agents):
209
+ """Create simplified tasks for the agents"""
210
+ analysis_agent, paraphrasing_agent = agents
211
+
212
+ # Truncate input text if too long
213
+ if len(input_text.split()) > 350:
214
+ words = input_text.split()
215
+ input_text = ' '.join(words[:350]) + "..."
216
+
217
+ # Task 1: Combined Analysis
218
+ analysis_task = Task(
219
+ description=f"""
220
+ Analyze this text briefly:
221
+
222
+ Text: {input_text}
223
+
224
+ Provide:
225
+ 1. Basic plagiarism check
226
+ 2. Readability score
227
+ 3. Word count
228
+
229
+ Keep response under 200 words.
230
+ """,
231
+ agent=analysis_agent,
232
+ expected_output="Brief analysis with plagiarism score and readability metrics"
233
+ )
234
+
235
+ # Task 2: Paraphrasing
236
+ paraphrasing_task = Task(
237
+ description=f"""
238
+ Rewrite this text to be original:
239
+
240
+ Original: {input_text}
241
+
242
+ Requirements:
243
+ 1. Maintain meaning
244
+ 2. Use different words
245
+ 3. Keep it clear and readable
246
+
247
+ Provide only the rewritten text.
248
+ """,
249
+ agent=paraphrasing_agent,
250
+ expected_output="Paraphrased text that maintains original meaning",
251
+ dependencies=[analysis_task]
252
+ )
253
+
254
+ return [analysis_task, paraphrasing_task]
255
+
256
+ def run_crew_analysis(input_text, selected_model):
257
+ """Run the simplified CrewAI analysis"""
258
+ try:
259
+ # Initialize LLM with Ollama
260
+ # Ensure Ollama server is running and the model is pulled (e.g., ollama run llama2)
261
+ llm = ChatOllama(model=selected_model)
262
+
263
+ # Create agents
264
+ agents = create_agents(llm)
265
+
266
+ # Create tasks
267
+ tasks = create_tasks(input_text, agents)
268
+
269
+ # Create crew
270
+ crew = Crew(
271
+ agents=list(agents),
272
+ tasks=tasks,
273
+ process=Process.sequential,
274
+ verbose=True
275
+ )
276
+
277
+ # Execute the crew with progress tracking
278
+ with st.spinner("Analyzing text with AI agents..."):
279
+ result = crew.kickoff()
280
+
281
+ return result
282
+ except Exception as e:
283
+ st.error(f"Error in crew analysis: {str(e)}")
284
+ return None
285
+
286
+ # Streamlit UI
287
+ def main():
288
+ st.set_page_config(
289
+ page_title="AI Paraphrasing & Plagiarism Checker",
290
+ page_icon="πŸ€–",
291
+ layout="wide"
292
+ )
293
+
294
+ st.title("πŸ€– AI-Powered Paraphrasing & Plagiarism Checker")
295
+ st.markdown("*Built with CrewAI Multi-Agent Framework and Ollama (Local LLM)*")
296
+
297
+ # Sidebar for configuration
298
+ with st.sidebar:
299
+ st.header("πŸ”§ Configuration")
300
+
301
+ # Removed Groq API Key input
302
+
303
+ # Model selection for Ollama
304
+ st.markdown("**Ollama Setup:**\n\n1. Download and install Ollama from [ollama.ai](https://ollama.ai/).\n2. Run `ollama run <model_name>` in your terminal (e.g., `ollama run llama2` or `ollama run mistral`).\n3. Ensure the Ollama server is running before using this app.")
305
+
306
+ model_options = [
307
+ "llama2", # A good general-purpose model
308
+ "mistral", # Another strong contender
309
+ "phi3", # Smaller, faster model for local use
310
+ # Add other Ollama models as needed
311
+ ]
312
+
313
+ selected_model = st.selectbox(
314
+ "Select Ollama Model",
315
+ model_options,
316
+ index=0, # Default to llama2
317
+ help="Choose an Ollama model you have pulled locally."
318
+ )
319
+
320
+ st.markdown("---")
321
+ st.markdown("### πŸ“Š Features")
322
+ st.markdown("- Smart plagiarism detection")
323
+ st.markdown("- Intelligent paraphrasing")
324
+ st.markdown("- Readability analysis")
325
+ st.markdown("- Local LLM support (Ollama)")
326
+
327
+ # Main content area
328
+ col1, col2 = st.columns([1, 1])
329
+
330
+ with col1:
331
+ st.header("πŸ“ Input Text")
332
+
333
+ # Text length warning
334
+ st.info("πŸ’‘ For best results, keep text under 400 words")
335
+
336
+ # Text input
337
+ input_text = st.text_area(
338
+ "Enter text to analyze and paraphrase:",
339
+ height=300,
340
+ placeholder="Paste your text here (max 400 words recommended)..."
341
+ )
342
+
343
+ # Show word count
344
+ if input_text:
345
+ word_count = len(input_text.split())
346
+ if word_count > 400:
347
+ st.warning(f"⚠️ Text has {word_count} words. Consider shortening for optimal results.")
348
+ else:
349
+ st.success(f"βœ… Text has {word_count} words!")
350
+
351
+ # Analysis button
352
+ if st.button("πŸš€ Analyze & Paraphrase", type="primary", use_container_width=True):
353
+ if not input_text.strip():
354
+ st.error("Please enter some text to analyze!")
355
+ else:
356
+ # Run analysis with selected Ollama model
357
+ result = run_crew_analysis(input_text, selected_model)
358
+
359
+ if result:
360
+ st.session_state.analysis_result = result
361
+ st.session_state.original_text = input_text
362
+ st.success("βœ… Analysis completed!")
363
+
364
+ with col2:
365
+ st.header("πŸ“Š Analysis Results")
366
+
367
+ if "analysis_result" in st.session_state:
368
+ result = st.session_state.analysis_result
369
+
370
+ # Display results in tabs
371
+ tab1, tab2 = st.tabs(["πŸ“ Paraphrased Text", "πŸ“ˆ Analysis"])
372
+
373
+ with tab1:
374
+ st.subheader("πŸ“ Paraphrased Text")
375
+
376
+ # Display paraphrased text
377
+ paraphrased_text = str(result)
378
+
379
+ st.text_area(
380
+ "Paraphrased version:",
381
+ value=paraphrased_text,
382
+ height=300,
383
+ help="This is the AI-generated paraphrased version"
384
+ )
385
+
386
+ # Download button
387
+ st.download_button(
388
+ label="πŸ“₯ Download Paraphrased Text",
389
+ data=paraphrased_text,
390
+ file_name="paraphrased_text.txt",
391
+ mime="text/plain"
392
+ )
393
+
394
+ with tab2:
395
+ st.subheader("πŸ“ˆ Analysis Summary")
396
+
397
+ # Display quick stats
398
+ original_words = len(st.session_state.original_text.split())
399
+ paraphrased_words = len(str(result).split())
400
+
401
+ col_a, col_b = st.columns(2)
402
+ with col_a:
403
+ st.metric("Original Words", original_words)
404
+ st.metric("Processing Status", "βœ… Complete")
405
+
406
+ with col_b:
407
+ st.metric("Paraphrased Words", paraphrased_words)
408
+ st.metric("Model Used", selected_model)
409
+
410
+ # Simple comparison chart
411
+ st.bar_chart({
412
+ "Original": [original_words],
413
+ "Paraphrased": [paraphrased_words]
414
+ })
415
+ else:
416
+ st.info("πŸ‘ˆ Enter text and click 'Analyze & Paraphrase' to see results")
417
+
418
+
419
+
420
+ if __name__ == "__main__":
421
+ main()
422
+