gopalaKrishna1236 commited on
Commit
fd438c4
·
verified ·
1 Parent(s): 4e4dfcf

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +92 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ from typing import List, Dict, Any
5
+ import re
6
+ from collections import Counter
7
+ import nltk
8
+ from nltk.corpus import stopwords
9
+ from nltk.tokenize import word_tokenize
10
+ from nltk.stem import WordNetLemmatizer
11
+ from nltk.sentiment.vader import SentimentIntensityAnalyzer
12
+
13
+ # Ensure needed NLTK data is present in your runtime/environment
14
+ # nltk.download('punkt'); nltk.download('stopwords'); nltk.download('wordnet'); nltk.download('vader_lexicon')
15
+
16
+ app = FastAPI(title="Insurance Claim Text Analytics API")
17
+
18
+ stop_words = set(stopwords.words('english'))
19
+ lemmatizer = WordNetLemmatizer()
20
+ sia = SentimentIntensityAnalyzer()
21
+
22
+ category_map = {
23
+ 'accident': 'Accident',
24
+ 'collision': 'Accident',
25
+ 'crash': 'Accident',
26
+ 'damage': 'Damage',
27
+ 'fire': 'Damage',
28
+ 'theft': 'Theft',
29
+ 'stolen': 'Theft',
30
+ 'vandal': 'Vandalism',
31
+ 'flood': 'Natural Disaster',
32
+ 'storm': 'Natural Disaster',
33
+ 'injury': 'Injury',
34
+ 'breakdown': 'Mechanical',
35
+ 'engine': 'Mechanical',
36
+ 'water': 'Damage',
37
+ 'laptop': 'Theft',
38
+ 'bike': 'Theft',
39
+ 'car': 'Accident'
40
+ }
41
+
42
+ class PredictRequest(BaseModel):
43
+ text: str
44
+ top_k: int = 10
45
+
46
+ class PredictResponse(BaseModel):
47
+ text: str
48
+ keywords: List[Dict[str, Any]]
49
+ categories: List[str]
50
+ sentiment: Dict[str, Any]
51
+
52
+ def clean_text(text: str) -> str:
53
+ text = str(text).lower()
54
+ text = re.sub(r'http\S+|www\S+|https\S+', '', text)
55
+ text = re.sub(r'\S+@\S+', '', text)
56
+ text = re.sub(r'\d+', ' ', text)
57
+ text = re.sub(r'[^a-z\s]', ' ', text)
58
+ text = re.sub(r'\s+', ' ', text).strip()
59
+ return text
60
+
61
+ def tokenize(text: str):
62
+ text = clean_text(text)
63
+ tokens = word_tokenize(text)
64
+ tokens = [t for t in tokens if t not in stop_words and len(t) > 2]
65
+ tokens = [lemmatizer.lemmatize(t) for t in tokens]
66
+ return tokens
67
+
68
+ @app.post("/predict", response_model=PredictResponse)
69
+ def predict(req: PredictRequest):
70
+ if not req.text or not req.text.strip():
71
+ raise HTTPException(status_code=400, detail="Empty text")
72
+ tokens = tokenize(req.text)
73
+ freq = Counter(tokens)
74
+ topk = freq.most_common(req.top_k)
75
+ cats = set()
76
+ for t in set(tokens):
77
+ if t in category_map:
78
+ cats.add(category_map[t])
79
+ scores = sia.polarity_scores(req.text)
80
+ comp = scores['compound']
81
+ if comp >= 0.05:
82
+ label = 'positive'
83
+ elif comp <= -0.05:
84
+ label = 'negative'
85
+ else:
86
+ label = 'neutral'
87
+ return {
88
+ "text": req.text,
89
+ "keywords": [{"keyword": k, "count": c} for k, c in topk],
90
+ "categories": list(cats),
91
+ "sentiment": {"neg": scores['neg'], "neu": scores['neu'], "pos": scores['pos'], "compound": comp, "label": label}
92
+ }
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ nltk
4
+ pandas
5
+ python-docx