import json import os from typing import Dict, Any import joblib import pandas as pd from fastapi import FastAPI, HTTPException from pydantic import BaseModel # Initialize FastAPI app app = FastAPI( title="Game of Thrones House Predictor", description="Predict which GoT house a character belongs to based on their traits", version="1.0.0" ) # Load model and metadata MODEL_DIR = os.path.join(os.path.dirname(__file__), "model") model = joblib.load(os.path.join(MODEL_DIR, "model.joblib")) with open(os.path.join(MODEL_DIR, "feature_columns.json"), "r") as f: feature_columns = json.load(f)["columns"] with open(os.path.join(MODEL_DIR, "label_classes.json"), "r") as f: label_classes = json.load(f)["classes"] # Input schema class CharacterInput(BaseModel): region: str primary_role: str alignment: str status: str species: str honour_1to5: int ruthlessness_1to5: int intelligence_1to5: int combat_skill_1to5: int diplomacy_1to5: int leadership_1to5: int trait_loyal: bool trait_scheming: bool class Config: json_schema_extra = { "example": { "region": "The North", "primary_role": "Commander", "alignment": "Lawful Good", "status": "Alive", "species": "Human", "honour_1to5": 4, "ruthlessness_1to5": 2, "intelligence_1to5": 3, "combat_skill_1to5": 4, "diplomacy_1to5": 3, "leadership_1to5": 4, "trait_loyal": True, "trait_scheming": False } } # Output schema class PredictionOutput(BaseModel): predicted_house: str confidence: float all_probabilities: Dict[str, float] def preprocess_input(input_data: CharacterInput) -> pd.DataFrame: """ Transform input JSON to match the one-hot encoded features from training. """ # Convert input to dict data_dict = input_data.model_dump() # Add missing trait columns with default values (0) # The model was trained with these traits, but input doesn't have them data_dict["trait_strategic"] = 0 data_dict["trait_impulsive"] = 0 data_dict["trait_charismatic"] = 0 data_dict["trait_vengeful"] = 0 data_dict["feature_set_version"] = 1.0 # Convert boolean traits to int for key in data_dict: if isinstance(data_dict[key], bool): data_dict[key] = int(data_dict[key]) # Create DataFrame with single row df = pd.DataFrame([data_dict]) # One-hot encode categorical columns categorical_cols = ["region", "primary_role", "alignment", "status", "species"] df_encoded = pd.get_dummies(df, columns=categorical_cols, dummy_na=True) # Align with training features (add missing columns, remove extra ones) for col in feature_columns: if col not in df_encoded.columns: df_encoded[col] = 0 # Reorder columns to match training df_encoded = df_encoded[feature_columns] return df_encoded @app.get("/") def root(): """Root endpoint with API information.""" return { "message": "Game of Thrones House Predictor API", "endpoints": { "predict": "/predict", "docs": "/docs", "health": "/health" } } @app.get("/health") def health_check(): """Health check endpoint.""" return {"status": "healthy", "model_loaded": model is not None} @app.post("/predict", response_model=PredictionOutput) def predict(character: CharacterInput): """ Predict the Game of Thrones house for a character based on their traits. """ try: # Preprocess input X = preprocess_input(character) # Make prediction prediction = model.predict(X)[0] # Get probabilities if available (Decision Tree has predict_proba) if hasattr(model, "predict_proba"): probabilities = model.predict_proba(X)[0] confidence = float(max(probabilities)) # Create dict of all probabilities all_probs = { label_classes[i]: float(probabilities[i]) for i in range(len(label_classes)) } else: confidence = 1.0 all_probs = {prediction: 1.0} return PredictionOutput( predicted_house=prediction, confidence=confidence, all_probabilities=all_probs ) except Exception as e: raise HTTPException(status_code=500, detail=f"Prediction error: {str(e)}") if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8001)