Upload 2 files
Browse files- app.py +92 -0
- requirements.txt +5 -0
app.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from fastapi import FastAPI, HTTPException
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from typing import List, Dict, Any
|
| 5 |
+
import re
|
| 6 |
+
from collections import Counter
|
| 7 |
+
import nltk
|
| 8 |
+
from nltk.corpus import stopwords
|
| 9 |
+
from nltk.tokenize import word_tokenize
|
| 10 |
+
from nltk.stem import WordNetLemmatizer
|
| 11 |
+
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
| 12 |
+
|
| 13 |
+
# Ensure needed NLTK data is present in your runtime/environment
|
| 14 |
+
# nltk.download('punkt'); nltk.download('stopwords'); nltk.download('wordnet'); nltk.download('vader_lexicon')
|
| 15 |
+
|
| 16 |
+
app = FastAPI(title="Insurance Claim Text Analytics API")
|
| 17 |
+
|
| 18 |
+
stop_words = set(stopwords.words('english'))
|
| 19 |
+
lemmatizer = WordNetLemmatizer()
|
| 20 |
+
sia = SentimentIntensityAnalyzer()
|
| 21 |
+
|
| 22 |
+
category_map = {
|
| 23 |
+
'accident': 'Accident',
|
| 24 |
+
'collision': 'Accident',
|
| 25 |
+
'crash': 'Accident',
|
| 26 |
+
'damage': 'Damage',
|
| 27 |
+
'fire': 'Damage',
|
| 28 |
+
'theft': 'Theft',
|
| 29 |
+
'stolen': 'Theft',
|
| 30 |
+
'vandal': 'Vandalism',
|
| 31 |
+
'flood': 'Natural Disaster',
|
| 32 |
+
'storm': 'Natural Disaster',
|
| 33 |
+
'injury': 'Injury',
|
| 34 |
+
'breakdown': 'Mechanical',
|
| 35 |
+
'engine': 'Mechanical',
|
| 36 |
+
'water': 'Damage',
|
| 37 |
+
'laptop': 'Theft',
|
| 38 |
+
'bike': 'Theft',
|
| 39 |
+
'car': 'Accident'
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
class PredictRequest(BaseModel):
|
| 43 |
+
text: str
|
| 44 |
+
top_k: int = 10
|
| 45 |
+
|
| 46 |
+
class PredictResponse(BaseModel):
|
| 47 |
+
text: str
|
| 48 |
+
keywords: List[Dict[str, Any]]
|
| 49 |
+
categories: List[str]
|
| 50 |
+
sentiment: Dict[str, Any]
|
| 51 |
+
|
| 52 |
+
def clean_text(text: str) -> str:
|
| 53 |
+
text = str(text).lower()
|
| 54 |
+
text = re.sub(r'http\S+|www\S+|https\S+', '', text)
|
| 55 |
+
text = re.sub(r'\S+@\S+', '', text)
|
| 56 |
+
text = re.sub(r'\d+', ' ', text)
|
| 57 |
+
text = re.sub(r'[^a-z\s]', ' ', text)
|
| 58 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
| 59 |
+
return text
|
| 60 |
+
|
| 61 |
+
def tokenize(text: str):
|
| 62 |
+
text = clean_text(text)
|
| 63 |
+
tokens = word_tokenize(text)
|
| 64 |
+
tokens = [t for t in tokens if t not in stop_words and len(t) > 2]
|
| 65 |
+
tokens = [lemmatizer.lemmatize(t) for t in tokens]
|
| 66 |
+
return tokens
|
| 67 |
+
|
| 68 |
+
@app.post("/predict", response_model=PredictResponse)
|
| 69 |
+
def predict(req: PredictRequest):
|
| 70 |
+
if not req.text or not req.text.strip():
|
| 71 |
+
raise HTTPException(status_code=400, detail="Empty text")
|
| 72 |
+
tokens = tokenize(req.text)
|
| 73 |
+
freq = Counter(tokens)
|
| 74 |
+
topk = freq.most_common(req.top_k)
|
| 75 |
+
cats = set()
|
| 76 |
+
for t in set(tokens):
|
| 77 |
+
if t in category_map:
|
| 78 |
+
cats.add(category_map[t])
|
| 79 |
+
scores = sia.polarity_scores(req.text)
|
| 80 |
+
comp = scores['compound']
|
| 81 |
+
if comp >= 0.05:
|
| 82 |
+
label = 'positive'
|
| 83 |
+
elif comp <= -0.05:
|
| 84 |
+
label = 'negative'
|
| 85 |
+
else:
|
| 86 |
+
label = 'neutral'
|
| 87 |
+
return {
|
| 88 |
+
"text": req.text,
|
| 89 |
+
"keywords": [{"keyword": k, "count": c} for k, c in topk],
|
| 90 |
+
"categories": list(cats),
|
| 91 |
+
"sentiment": {"neg": scores['neg'], "neu": scores['neu'], "pos": scores['pos'], "compound": comp, "label": label}
|
| 92 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
nltk
|
| 4 |
+
pandas
|
| 5 |
+
python-docx
|