Spaces:

gopalaKrishna1236
/

Insurance_Claim_Text_Analytics

Sleeping

App Files Files Community

gopalaKrishna1236 commited on Sep 12, 2025

Commit

fd438c4

verified ·

1 Parent(s): 4e4dfcf

Upload 2 files

Browse files

Files changed (2) hide show

app.py +92 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import List, Dict, Any
+import re
+from collections import Counter
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from nltk.stem import WordNetLemmatizer
+from nltk.sentiment.vader import SentimentIntensityAnalyzer
+# Ensure needed NLTK data is present in your runtime/environment
+# nltk.download('punkt'); nltk.download('stopwords'); nltk.download('wordnet'); nltk.download('vader_lexicon')
+app = FastAPI(title="Insurance Claim Text Analytics API")
+stop_words = set(stopwords.words('english'))
+lemmatizer = WordNetLemmatizer()
+sia = SentimentIntensityAnalyzer()
+category_map = {
+    'accident': 'Accident',
+    'collision': 'Accident',
+    'crash': 'Accident',
+    'damage': 'Damage',
+    'fire': 'Damage',
+    'theft': 'Theft',
+    'stolen': 'Theft',
+    'vandal': 'Vandalism',
+    'flood': 'Natural Disaster',
+    'storm': 'Natural Disaster',
+    'injury': 'Injury',
+    'breakdown': 'Mechanical',
+    'engine': 'Mechanical',
+    'water': 'Damage',
+    'laptop': 'Theft',
+    'bike': 'Theft',
+    'car': 'Accident'
+}
+class PredictRequest(BaseModel):
+    text: str
+    top_k: int = 10
+class PredictResponse(BaseModel):
+    text: str
+    keywords: List[Dict[str, Any]]
+    categories: List[str]
+    sentiment: Dict[str, Any]
+def clean_text(text: str) -> str:
+    text = str(text).lower()
+    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
+    text = re.sub(r'\S+@\S+', '', text)
+    text = re.sub(r'\d+', ' ', text)
+    text = re.sub(r'[^a-z\s]', ' ', text)
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+def tokenize(text: str):
+    text = clean_text(text)
+    tokens = word_tokenize(text)
+    tokens = [t for t in tokens if t not in stop_words and len(t) > 2]
+    tokens = [lemmatizer.lemmatize(t) for t in tokens]
+    return tokens
+@app.post("/predict", response_model=PredictResponse)
+def predict(req: PredictRequest):
+    if not req.text or not req.text.strip():
+        raise HTTPException(status_code=400, detail="Empty text")
+    tokens = tokenize(req.text)
+    freq = Counter(tokens)
+    topk = freq.most_common(req.top_k)
+    cats = set()
+    for t in set(tokens):
+        if t in category_map:
+            cats.add(category_map[t])
+    scores = sia.polarity_scores(req.text)
+    comp = scores['compound']
+    if comp >= 0.05:
+        label = 'positive'
+    elif comp <= -0.05:
+        label = 'negative'
+    else:
+        label = 'neutral'
+    return {
+        "text": req.text,
+        "keywords": [{"keyword": k, "count": c} for k, c in topk],
+        "categories": list(cats),
+        "sentiment": {"neg": scores['neg'], "neu": scores['neu'], "pos": scores['pos'], "compound": comp, "label": label}
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi
+uvicorn[standard]
+nltk
+pandas
+python-docx