File size: 2,767 Bytes
4a74ef7 c7b12e4 4a74ef7 c7b12e4 0e73558 c7b12e4 bd91351 c7b12e4 4a74ef7 f733c1b 5d62fad 6f92079 f733c1b 4a74ef7 f733c1b 6f92079 2525fee f733c1b 4a74ef7 c7b12e4 4a74ef7 5d62fad 0e73558 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
from flask import Flask, request, jsonify
import torch
import os
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
app = Flask(__name__)
# Hugging Face repo and token
REPO = "aaronmrls/distilBERT-maintenance-priority-scorer"
HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN") # optional (only needed if repo is private)
print("Loading tokenizer and model from Hugging Face Hub...")
print("Using Hugging Face token:", "Yes" if HF_TOKEN else "No")
# Load tokenizer & model directly from Hub
tokenizer = DistilBertTokenizerFast.from_pretrained(REPO, use_auth_token=HF_TOKEN)
model = DistilBertForSequenceClassification.from_pretrained(REPO, use_auth_token=HF_TOKEN)
model.eval()
@app.route("/predict", methods=["POST"])
def predict():
try:
data = request.get_json()
results = []
for item in data:
# Build input text
input_text = f"{item['category']} - {item['subcategory']} in {item['area']}. {item.get('comments', '')}"
text_lower = input_text.lower()
# Critical and low-urgency keyword lists
critical_keywords = [
"umuusok", "sunog", "amoy sunog", "spark", "kuryente",
"leak", "baha", "gas", "short circuit", "smoke"
]
low_keywords = [
"lightbulb", "bumbilya", "ilaw", "palitan", "replace bulb",
"painting", "door knob", "hinge", "minor", "cosmetic"
]
# ✅ Priority rules
if any(word in text_lower for word in critical_keywords):
results.append({"priority_score": 5})
continue
elif any(word in text_lower for word in low_keywords):
results.append({"priority_score": 2})
continue
# Tokenize & predict
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
predicted_class = torch.argmax(outputs.logits, dim=1).item()
# ✅ Convert back to 1–5 scale
priority_score = predicted_class + 1
# Soft correction: cap too-high or too-low predictions
if priority_score >= 5 and not any(word in text_lower for word in critical_keywords):
priority_score = 4 # reduce overly high unless it's critical
elif priority_score <= 1:
priority_score = 2 # lift up if too low
results.append({"priority_score": priority_score})
return jsonify(results)
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)
|