pythonprincess commited on
Commit
247ae88
·
verified ·
1 Parent(s): 5195b70

Delete sentiment_utils.py

Browse files
Files changed (1) hide show
  1. sentiment_utils.py +0 -450
sentiment_utils.py DELETED
@@ -1,450 +0,0 @@
1
- # models/sentiment/sentiment_utils.py
2
-
3
- """
4
- Sentiment Analysis Model Utilities for PENNY Project
5
- Handles text sentiment classification for user input analysis and content moderation.
6
- Provides async sentiment analysis with structured error handling and logging.
7
- """
8
-
9
- import asyncio
10
- import time
11
- import os
12
- import httpx
13
- from typing import Dict, Any, Optional, List
14
-
15
- # --- Logging Imports ---
16
- from app.logging_utils import log_interaction, sanitize_for_logging
17
-
18
- # --- Hugging Face API Configuration ---
19
- HF_API_URL = "https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment"
20
- HF_TOKEN = os.getenv("HF_TOKEN")
21
-
22
- AGENT_NAME = "penny-sentiment-agent"
23
-
24
-
25
- def is_sentiment_available() -> bool:
26
- """
27
- Check if sentiment analysis service is available.
28
-
29
- Returns:
30
- bool: True if sentiment API is configured and ready.
31
- """
32
- return HF_TOKEN is not None and len(HF_TOKEN) > 0
33
-
34
-
35
- async def get_sentiment_analysis(
36
- text: str,
37
- tenant_id: Optional[str] = None
38
- ) -> Dict[str, Any]:
39
- """
40
- Runs sentiment analysis on the input text using the loaded pipeline.
41
-
42
- Args:
43
- text: The string of text to analyze.
44
- tenant_id: Optional tenant identifier for logging.
45
-
46
- Returns:
47
- A dictionary containing:
48
- - label (str): Sentiment label (e.g., "POSITIVE", "NEGATIVE", "NEUTRAL")
49
- - score (float): Confidence score for the sentiment prediction
50
- - available (bool): Whether the service was available
51
- - message (str, optional): Error message if analysis failed
52
- - response_time_ms (int, optional): Analysis time in milliseconds
53
- """
54
- start_time = time.time()
55
-
56
- # Check availability
57
- if not is_sentiment_available():
58
- log_interaction(
59
- intent="sentiment_analysis",
60
- tenant_id=tenant_id,
61
- success=False,
62
- error="Sentiment API not configured (missing HF_TOKEN)",
63
- fallback_used=True
64
- )
65
- return {
66
- "label": "UNKNOWN",
67
- "score": 0.0,
68
- "available": False,
69
- "message": "Sentiment analysis is temporarily unavailable."
70
- }
71
-
72
- # Validate input
73
- if not text or not isinstance(text, str):
74
- log_interaction(
75
- intent="sentiment_analysis",
76
- tenant_id=tenant_id,
77
- success=False,
78
- error="Invalid text input"
79
- )
80
- return {
81
- "label": "ERROR",
82
- "score": 0.0,
83
- "available": True,
84
- "message": "Invalid text input provided."
85
- }
86
-
87
- # Check text length (prevent processing extremely long texts)
88
- if len(text) > 10000: # 10k character limit
89
- log_interaction(
90
- intent="sentiment_analysis",
91
- tenant_id=tenant_id,
92
- success=False,
93
- error=f"Text too long: {len(text)} characters",
94
- text_preview=sanitize_for_logging(text[:100])
95
- )
96
- return {
97
- "label": "ERROR",
98
- "score": 0.0,
99
- "available": True,
100
- "message": "Text is too long for sentiment analysis (max 10,000 characters)."
101
- }
102
-
103
- try:
104
- # Prepare API request
105
- headers = {"Authorization": f"Bearer {HF_TOKEN}"}
106
- payload = {"inputs": text}
107
-
108
- # Call Hugging Face Inference API
109
- async with httpx.AsyncClient(timeout=30.0) as client:
110
- response = await client.post(HF_API_URL, json=payload, headers=headers)
111
-
112
- response_time_ms = int((time.time() - start_time) * 1000)
113
-
114
- if response.status_code != 200:
115
- log_interaction(
116
- intent="sentiment_analysis",
117
- tenant_id=tenant_id,
118
- success=False,
119
- error=f"API returned status {response.status_code}",
120
- response_time_ms=response_time_ms,
121
- text_preview=sanitize_for_logging(text[:100]),
122
- fallback_used=True
123
- )
124
- return {
125
- "label": "ERROR",
126
- "score": 0.0,
127
- "available": False,
128
- "message": f"Sentiment API error: {response.status_code}",
129
- "response_time_ms": response_time_ms
130
- }
131
-
132
- results = response.json()
133
-
134
- # Validate results
135
- # API returns: [[{"label": "LABEL_2", "score": 0.95}, ...]]
136
- if not results or not isinstance(results, list) or len(results) == 0:
137
- log_interaction(
138
- intent="sentiment_analysis",
139
- tenant_id=tenant_id,
140
- success=False,
141
- error="Empty or invalid model output",
142
- response_time_ms=response_time_ms,
143
- text_preview=sanitize_for_logging(text[:100])
144
- )
145
- return {
146
- "label": "ERROR",
147
- "score": 0.0,
148
- "available": True,
149
- "message": "Sentiment analysis returned unexpected format."
150
- }
151
-
152
- # Get the first (highest scoring) result
153
- result_list = results[0] if isinstance(results[0], list) else results
154
-
155
- if not result_list or len(result_list) == 0:
156
- log_interaction(
157
- intent="sentiment_analysis",
158
- tenant_id=tenant_id,
159
- success=False,
160
- error="Empty result list",
161
- response_time_ms=response_time_ms,
162
- text_preview=sanitize_for_logging(text[:100])
163
- )
164
- return {
165
- "label": "ERROR",
166
- "score": 0.0,
167
- "available": True,
168
- "message": "Sentiment analysis returned unexpected format."
169
- }
170
-
171
- result = result_list[0]
172
-
173
- # Validate result structure
174
- if not isinstance(result, dict) or 'label' not in result or 'score' not in result:
175
- log_interaction(
176
- intent="sentiment_analysis",
177
- tenant_id=tenant_id,
178
- success=False,
179
- error="Invalid result structure",
180
- response_time_ms=response_time_ms,
181
- text_preview=sanitize_for_logging(text[:100])
182
- )
183
- return {
184
- "label": "ERROR",
185
- "score": 0.0,
186
- "available": True,
187
- "message": "Sentiment analysis returned unexpected format."
188
- }
189
-
190
- # Map RoBERTa labels to readable format
191
- # LABEL_0 = NEGATIVE, LABEL_1 = NEUTRAL, LABEL_2 = POSITIVE
192
- label_mapping = {
193
- "LABEL_0": "NEGATIVE",
194
- "LABEL_1": "NEUTRAL",
195
- "LABEL_2": "POSITIVE"
196
- }
197
- label = label_mapping.get(result['label'], result['label'])
198
-
199
- # Log slow analysis
200
- if response_time_ms > 3000: # 3 seconds
201
- log_interaction(
202
- intent="sentiment_analysis_slow",
203
- tenant_id=tenant_id,
204
- success=True,
205
- response_time_ms=response_time_ms,
206
- details="Slow sentiment analysis detected",
207
- text_length=len(text)
208
- )
209
-
210
- log_interaction(
211
- intent="sentiment_analysis",
212
- tenant_id=tenant_id,
213
- success=True,
214
- response_time_ms=response_time_ms,
215
- sentiment_label=label,
216
- sentiment_score=result.get('score'),
217
- text_length=len(text)
218
- )
219
-
220
- return {
221
- "label": label,
222
- "score": float(result['score']),
223
- "available": True,
224
- "response_time_ms": response_time_ms
225
- }
226
-
227
- except httpx.TimeoutException:
228
- response_time_ms = int((time.time() - start_time) * 1000)
229
- log_interaction(
230
- intent="sentiment_analysis",
231
- tenant_id=tenant_id,
232
- success=False,
233
- error="Sentiment analysis request timed out",
234
- response_time_ms=response_time_ms,
235
- text_preview=sanitize_for_logging(text[:100]),
236
- fallback_used=True
237
- )
238
- return {
239
- "label": "ERROR",
240
- "score": 0.0,
241
- "available": False,
242
- "message": "Sentiment analysis request timed out.",
243
- "response_time_ms": response_time_ms
244
- }
245
-
246
- except asyncio.CancelledError:
247
- log_interaction(
248
- intent="sentiment_analysis",
249
- tenant_id=tenant_id,
250
- success=False,
251
- error="Analysis cancelled"
252
- )
253
- raise
254
-
255
- except Exception as e:
256
- response_time_ms = int((time.time() - start_time) * 1000)
257
-
258
- log_interaction(
259
- intent="sentiment_analysis",
260
- tenant_id=tenant_id,
261
- success=False,
262
- error=str(e),
263
- response_time_ms=response_time_ms,
264
- text_preview=sanitize_for_logging(text[:100]),
265
- fallback_used=True
266
- )
267
-
268
- return {
269
- "label": "ERROR",
270
- "score": 0.0,
271
- "available": False,
272
- "message": "An error occurred during sentiment analysis.",
273
- "error": str(e),
274
- "response_time_ms": response_time_ms
275
- }
276
-
277
-
278
- async def analyze_sentiment_batch(
279
- texts: List[str],
280
- tenant_id: Optional[str] = None
281
- ) -> Dict[str, Any]:
282
- """
283
- Runs sentiment analysis on a batch of texts for efficiency.
284
-
285
- Args:
286
- texts: List of text strings to analyze.
287
- tenant_id: Optional tenant identifier for logging.
288
-
289
- Returns:
290
- A dictionary containing:
291
- - results (list): List of sentiment analysis results for each text
292
- - available (bool): Whether the service was available
293
- - total_analyzed (int): Number of texts successfully analyzed
294
- - response_time_ms (int, optional): Total batch analysis time
295
- """
296
- start_time = time.time()
297
-
298
- # Check availability
299
- if not is_sentiment_available():
300
- log_interaction(
301
- intent="sentiment_batch_analysis",
302
- tenant_id=tenant_id,
303
- success=False,
304
- error="Sentiment API not configured (missing HF_TOKEN)",
305
- batch_size=len(texts) if texts else 0
306
- )
307
- return {
308
- "results": [],
309
- "available": False,
310
- "total_analyzed": 0,
311
- "message": "Sentiment analysis is temporarily unavailable."
312
- }
313
-
314
- # Validate input
315
- if not texts or not isinstance(texts, list):
316
- log_interaction(
317
- intent="sentiment_batch_analysis",
318
- tenant_id=tenant_id,
319
- success=False,
320
- error="Invalid texts input"
321
- )
322
- return {
323
- "results": [],
324
- "available": True,
325
- "total_analyzed": 0,
326
- "message": "Invalid batch input provided."
327
- }
328
-
329
- # Filter valid texts and limit batch size
330
- valid_texts = [t for t in texts if isinstance(t, str) and t.strip()]
331
- if len(valid_texts) > 100: # Batch size limit
332
- valid_texts = valid_texts[:100]
333
-
334
- if not valid_texts:
335
- log_interaction(
336
- intent="sentiment_batch_analysis",
337
- tenant_id=tenant_id,
338
- success=False,
339
- error="No valid texts in batch"
340
- )
341
- return {
342
- "results": [],
343
- "available": True,
344
- "total_analyzed": 0,
345
- "message": "No valid texts provided for analysis."
346
- }
347
-
348
- try:
349
- # Prepare API request with batch input
350
- headers = {"Authorization": f"Bearer {HF_TOKEN}"}
351
- payload = {"inputs": valid_texts}
352
-
353
- # Call Hugging Face Inference API
354
- async with httpx.AsyncClient(timeout=60.0) as client: # Longer timeout for batch
355
- response = await client.post(HF_API_URL, json=payload, headers=headers)
356
-
357
- response_time_ms = int((time.time() - start_time) * 1000)
358
-
359
- if response.status_code != 200:
360
- log_interaction(
361
- intent="sentiment_batch_analysis",
362
- tenant_id=tenant_id,
363
- success=False,
364
- error=f"API returned status {response.status_code}",
365
- response_time_ms=response_time_ms,
366
- batch_size=len(valid_texts)
367
- )
368
- return {
369
- "results": [],
370
- "available": False,
371
- "total_analyzed": 0,
372
- "message": f"Sentiment API error: {response.status_code}",
373
- "response_time_ms": response_time_ms
374
- }
375
-
376
- results = response.json()
377
-
378
- # Process results and map labels
379
- label_mapping = {
380
- "LABEL_0": "NEGATIVE",
381
- "LABEL_1": "NEUTRAL",
382
- "LABEL_2": "POSITIVE"
383
- }
384
-
385
- processed_results = []
386
- if results and isinstance(results, list):
387
- for item in results:
388
- if isinstance(item, list) and len(item) > 0:
389
- top_result = item[0]
390
- if isinstance(top_result, dict) and 'label' in top_result:
391
- processed_results.append({
392
- "label": label_mapping.get(top_result['label'], top_result['label']),
393
- "score": float(top_result.get('score', 0.0))
394
- })
395
-
396
- log_interaction(
397
- intent="sentiment_batch_analysis",
398
- tenant_id=tenant_id,
399
- success=True,
400
- response_time_ms=response_time_ms,
401
- batch_size=len(valid_texts),
402
- total_analyzed=len(processed_results)
403
- )
404
-
405
- return {
406
- "results": processed_results,
407
- "available": True,
408
- "total_analyzed": len(processed_results),
409
- "response_time_ms": response_time_ms
410
- }
411
-
412
- except httpx.TimeoutException:
413
- response_time_ms = int((time.time() - start_time) * 1000)
414
- log_interaction(
415
- intent="sentiment_batch_analysis",
416
- tenant_id=tenant_id,
417
- success=False,
418
- error="Batch sentiment analysis timed out",
419
- response_time_ms=response_time_ms,
420
- batch_size=len(valid_texts)
421
- )
422
- return {
423
- "results": [],
424
- "available": False,
425
- "total_analyzed": 0,
426
- "message": "Batch sentiment analysis timed out.",
427
- "error": "Request timeout",
428
- "response_time_ms": response_time_ms
429
- }
430
-
431
- except Exception as e:
432
- response_time_ms = int((time.time() - start_time) * 1000)
433
-
434
- log_interaction(
435
- intent="sentiment_batch_analysis",
436
- tenant_id=tenant_id,
437
- success=False,
438
- error=str(e),
439
- response_time_ms=response_time_ms,
440
- batch_size=len(valid_texts)
441
- )
442
-
443
- return {
444
- "results": [],
445
- "available": False,
446
- "total_analyzed": 0,
447
- "message": "An error occurred during batch sentiment analysis.",
448
- "error": str(e),
449
- "response_time_ms": response_time_ms
450
- }