sanketshinde3001 commited on
Commit
b04beef
·
verified ·
1 Parent(s): 26043fc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +288 -0
app.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from transformers import pipeline
5
+ import difflib
6
+ import spacy
7
+ import re
8
+ from nltk.sentiment import SentimentIntensityAnalyzer
9
+ import nltk
10
+ from collections import Counter
11
+ import uvicorn
12
+
13
+ # Download NLTK resources
14
+ try:
15
+ nltk.download('vader_lexicon', quiet=True)
16
+ nltk.download('punkt', quiet=True)
17
+ nltk.download('stopwords', quiet=True)
18
+ except:
19
+ print("Could not download NLTK resources. Some features may be limited.")
20
+
21
+ app = FastAPI()
22
+
23
+ # Configure CORS
24
+ app.add_middleware(
25
+ CORSMiddleware,
26
+ allow_origins=["*"], # Allows all origins
27
+ allow_credentials=True,
28
+ allow_methods=["*"], # Allows all methods
29
+ allow_headers=["*"], # Allows all headers
30
+ )
31
+
32
+ # Load NLP models
33
+ try:
34
+ # Load text humanization model
35
+ humanize_pipe = pipeline("text2text-generation", model="danibor/flan-t5-base-humanizer")
36
+
37
+ # Load spaCy model
38
+ nlp = spacy.load("en_core_web_sm")
39
+
40
+ # Initialize sentiment analyzer
41
+ sentiment_analyzer = SentimentIntensityAnalyzer()
42
+
43
+ print("All NLP models loaded successfully!")
44
+ except Exception as e:
45
+ print(f"Error loading models: {e}")
46
+ # Create fallback functions if models fail to load
47
+ def mock_function(text):
48
+ return "Model could not be loaded. This is a fallback response."
49
+
50
+ # Define request models
51
+ class TextRequest(BaseModel):
52
+ text: str
53
+
54
+ class HumanizeResponse(BaseModel):
55
+ original_text: str
56
+ humanized_text: str
57
+ diff: list
58
+ original_word_count: int
59
+ humanized_word_count: int
60
+ nlp_analysis: dict
61
+
62
+ class AnalyzeResponse(BaseModel):
63
+ text: str
64
+ word_count: int
65
+ sentiment: dict
66
+ entities: dict
67
+ key_phrases: list
68
+ readability: dict
69
+ complexity: dict
70
+
71
+ @app.post("/humanize", response_model=HumanizeResponse)
72
+ async def humanize_text(request: TextRequest):
73
+ input_text = request.text
74
+
75
+ try:
76
+ # Generate humanized text
77
+ result = humanize_pipe(input_text, max_length=500, do_sample=True)
78
+ humanized_text = result[0]['generated_text']
79
+
80
+ # Get the differences
81
+ diff = get_diff(input_text, humanized_text)
82
+
83
+ # Process both texts with NLP
84
+ nlp_analysis = perform_nlp_analysis(input_text, humanized_text)
85
+
86
+ return {
87
+ 'original_text': input_text,
88
+ 'humanized_text': humanized_text,
89
+ 'diff': diff,
90
+ 'original_word_count': len(input_text.split()),
91
+ 'humanized_word_count': len(humanized_text.split()),
92
+ 'nlp_analysis': nlp_analysis
93
+ }
94
+ except Exception as e:
95
+ raise HTTPException(status_code=500, detail=f"Error processing text: {str(e)}")
96
+
97
+ def get_diff(text1, text2):
98
+ """
99
+ Generate a list of changes between two texts.
100
+ Returns a list of tuples (operation, text)
101
+ where operation is '+' for addition, '-' for deletion, or ' ' for unchanged.
102
+ """
103
+ d = difflib.Differ()
104
+ diff = list(d.compare(text1.split(), text2.split()))
105
+
106
+ result = []
107
+ for item in diff:
108
+ operation = item[0]
109
+ if operation in ['+', '-', ' ']:
110
+ text = item[2:]
111
+ result.append({'operation': operation, 'text': text})
112
+
113
+ return result
114
+
115
+ def perform_nlp_analysis(original_text, humanized_text):
116
+ """
117
+ Perform comprehensive NLP analysis on both original and humanized text.
118
+ """
119
+ result = {}
120
+
121
+ # Process both texts with spaCy
122
+ original_doc = nlp(original_text)
123
+ humanized_doc = nlp(humanized_text)
124
+
125
+ # Sentiment analysis
126
+ original_sentiment = sentiment_analyzer.polarity_scores(original_text)
127
+ humanized_sentiment = sentiment_analyzer.polarity_scores(humanized_text)
128
+
129
+ # Extract named entities
130
+ original_entities = extract_entities(original_doc)
131
+ humanized_entities = extract_entities(humanized_doc)
132
+
133
+ # Extract key phrases using noun chunks
134
+ original_phrases = extract_key_phrases(original_doc)
135
+ humanized_phrases = extract_key_phrases(humanized_doc)
136
+
137
+ # Readability metrics
138
+ original_readability = calculate_readability(original_text)
139
+ humanized_readability = calculate_readability(humanized_text)
140
+
141
+ # Complexity metrics
142
+ original_complexity = analyze_complexity(original_doc)
143
+ humanized_complexity = analyze_complexity(humanized_doc)
144
+
145
+ # Compile all results
146
+ result = {
147
+ 'original': {
148
+ 'sentiment': original_sentiment,
149
+ 'entities': original_entities,
150
+ 'key_phrases': original_phrases,
151
+ 'readability': original_readability,
152
+ 'complexity': original_complexity
153
+ },
154
+ 'humanized': {
155
+ 'sentiment': humanized_sentiment,
156
+ 'entities': humanized_entities,
157
+ 'key_phrases': humanized_phrases,
158
+ 'readability': humanized_readability,
159
+ 'complexity': humanized_complexity
160
+ }
161
+ }
162
+
163
+ return result
164
+
165
+ def extract_entities(doc):
166
+ """Extract and categorize named entities from a spaCy document."""
167
+ entities = {}
168
+ for ent in doc.ents:
169
+ if ent.label_ not in entities:
170
+ entities[ent.label_] = []
171
+ if ent.text not in entities[ent.label_]:
172
+ entities[ent.label_].append(ent.text)
173
+ return entities
174
+
175
+ def extract_key_phrases(doc):
176
+ """Extract key phrases using noun chunks."""
177
+ return [chunk.text for chunk in doc.noun_chunks][:10] # Limit to top 10
178
+
179
+ def calculate_readability(text):
180
+ """Calculate basic readability metrics."""
181
+ # Count sentences
182
+ sentences = len(list(nltk.sent_tokenize(text)))
183
+ if sentences == 0:
184
+ sentences = 1 # Avoid division by zero
185
+
186
+ # Count words
187
+ words = len(text.split())
188
+ if words == 0:
189
+ words = 1 # Avoid division by zero
190
+
191
+ # Average words per sentence
192
+ avg_words_per_sentence = words / sentences
193
+
194
+ # Count syllables (simplified approach)
195
+ syllables = count_syllables(text)
196
+
197
+ # Calculate Flesch Reading Ease
198
+ flesch = 206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words)
199
+
200
+ return {
201
+ 'sentence_count': sentences,
202
+ 'word_count': words,
203
+ 'avg_words_per_sentence': round(avg_words_per_sentence, 2),
204
+ 'syllable_count': syllables,
205
+ 'flesch_reading_ease': round(flesch, 2)
206
+ }
207
+
208
+ def count_syllables(text):
209
+ """Count syllables in text (simplified approach)."""
210
+ # This is a simplified syllable counter
211
+ text = text.lower()
212
+ text = re.sub(r'[^a-zA-Z]', ' ', text)
213
+ words = text.split()
214
+
215
+ count = 0
216
+ for word in words:
217
+ word = word.strip()
218
+ if not word:
219
+ continue
220
+
221
+ # Count vowel groups as syllables
222
+ if word[-1] == 'e':
223
+ word = word[:-1]
224
+
225
+ vowel_count = len(re.findall(r'[aeiouy]+', word))
226
+ if vowel_count == 0:
227
+ vowel_count = 1
228
+
229
+ count += vowel_count
230
+
231
+ return count
232
+
233
+ def analyze_complexity(doc):
234
+ """Analyze text complexity using POS tags and dependency parsing."""
235
+ # Count POS tags
236
+ pos_counts = Counter([token.pos_ for token in doc])
237
+
238
+ # Calculate lexical diversity
239
+ total_tokens = len(doc)
240
+ unique_tokens = len(set([token.text.lower() for token in doc]))
241
+
242
+ lexical_diversity = unique_tokens / total_tokens if total_tokens > 0 else 0
243
+
244
+ # Count dependency relationship types
245
+ dep_counts = Counter([token.dep_ for token in doc])
246
+
247
+ return {
248
+ 'pos_distribution': dict(pos_counts),
249
+ 'lexical_diversity': round(lexical_diversity, 4),
250
+ 'dependency_types': dict(dep_counts)
251
+ }
252
+
253
+ @app.post("/analyze", response_model=AnalyzeResponse)
254
+ async def analyze_text(request: TextRequest):
255
+ """Endpoint to just analyze text without humanizing it."""
256
+ input_text = request.text
257
+
258
+ try:
259
+ # Process text with NLP
260
+ doc = nlp(input_text)
261
+
262
+ # Analyze text
263
+ sentiment = sentiment_analyzer.polarity_scores(input_text)
264
+ entities = extract_entities(doc)
265
+ key_phrases = extract_key_phrases(doc)
266
+ readability = calculate_readability(input_text)
267
+ complexity = analyze_complexity(doc)
268
+
269
+ return {
270
+ 'text': input_text,
271
+ 'word_count': len(input_text.split()),
272
+ 'sentiment': sentiment,
273
+ 'entities': entities,
274
+ 'key_phrases': key_phrases,
275
+ 'readability': readability,
276
+ 'complexity': complexity
277
+ }
278
+ except Exception as e:
279
+ raise HTTPException(status_code=500, detail=f"Error analyzing text: {str(e)}")
280
+
281
+ # Add a root endpoint for Hugging Face Spaces health check
282
+ @app.get("/")
283
+ async def root():
284
+ return {"message": "Text Analysis and Humanization API is running!"}
285
+
286
+ # For local development
287
+ if __name__ == "__main__":
288
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)