mlops-exam / app.py
warresnaet's picture
Upload folder using huggingface_hub
85cf785 verified
import json
import os
from typing import Dict, Any
import joblib
import pandas as pd
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
# Initialize FastAPI app
app = FastAPI(
title="Game of Thrones House Predictor",
description="Predict which GoT house a character belongs to based on their traits",
version="1.0.0"
)
# Load model and metadata
MODEL_DIR = os.path.join(os.path.dirname(__file__), "model")
model = joblib.load(os.path.join(MODEL_DIR, "model.joblib"))
with open(os.path.join(MODEL_DIR, "feature_columns.json"), "r") as f:
feature_columns = json.load(f)["columns"]
with open(os.path.join(MODEL_DIR, "label_classes.json"), "r") as f:
label_classes = json.load(f)["classes"]
# Input schema
class CharacterInput(BaseModel):
region: str
primary_role: str
alignment: str
status: str
species: str
honour_1to5: int
ruthlessness_1to5: int
intelligence_1to5: int
combat_skill_1to5: int
diplomacy_1to5: int
leadership_1to5: int
trait_loyal: bool
trait_scheming: bool
class Config:
json_schema_extra = {
"example": {
"region": "The North",
"primary_role": "Commander",
"alignment": "Lawful Good",
"status": "Alive",
"species": "Human",
"honour_1to5": 4,
"ruthlessness_1to5": 2,
"intelligence_1to5": 3,
"combat_skill_1to5": 4,
"diplomacy_1to5": 3,
"leadership_1to5": 4,
"trait_loyal": True,
"trait_scheming": False
}
}
# Output schema
class PredictionOutput(BaseModel):
predicted_house: str
confidence: float
all_probabilities: Dict[str, float]
def preprocess_input(input_data: CharacterInput) -> pd.DataFrame:
"""
Transform input JSON to match the one-hot encoded features from training.
"""
# Convert input to dict
data_dict = input_data.model_dump()
# Add missing trait columns with default values (0)
# The model was trained with these traits, but input doesn't have them
data_dict["trait_strategic"] = 0
data_dict["trait_impulsive"] = 0
data_dict["trait_charismatic"] = 0
data_dict["trait_vengeful"] = 0
data_dict["feature_set_version"] = 1.0
# Convert boolean traits to int
for key in data_dict:
if isinstance(data_dict[key], bool):
data_dict[key] = int(data_dict[key])
# Create DataFrame with single row
df = pd.DataFrame([data_dict])
# One-hot encode categorical columns
categorical_cols = ["region", "primary_role", "alignment", "status", "species"]
df_encoded = pd.get_dummies(df, columns=categorical_cols, dummy_na=True)
# Align with training features (add missing columns, remove extra ones)
for col in feature_columns:
if col not in df_encoded.columns:
df_encoded[col] = 0
# Reorder columns to match training
df_encoded = df_encoded[feature_columns]
return df_encoded
@app.get("/")
def root():
"""Root endpoint with API information."""
return {
"message": "Game of Thrones House Predictor API",
"endpoints": {
"predict": "/predict",
"docs": "/docs",
"health": "/health"
}
}
@app.get("/health")
def health_check():
"""Health check endpoint."""
return {"status": "healthy", "model_loaded": model is not None}
@app.post("/predict", response_model=PredictionOutput)
def predict(character: CharacterInput):
"""
Predict the Game of Thrones house for a character based on their traits.
"""
try:
# Preprocess input
X = preprocess_input(character)
# Make prediction
prediction = model.predict(X)[0]
# Get probabilities if available (Decision Tree has predict_proba)
if hasattr(model, "predict_proba"):
probabilities = model.predict_proba(X)[0]
confidence = float(max(probabilities))
# Create dict of all probabilities
all_probs = {
label_classes[i]: float(probabilities[i])
for i in range(len(label_classes))
}
else:
confidence = 1.0
all_probs = {prediction: 1.0}
return PredictionOutput(
predicted_house=prediction,
confidence=confidence,
all_probabilities=all_probs
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Prediction error: {str(e)}")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8001)