Spaces:

zotthytt12
/

hr_classifier

Sleeping

File size: 5,575 Bytes

import subprocess
import sys
import os

# --- 1. AWARYJNA INSTALACJA XGBOOST ---
# Ten fragment musi być na samej górze, zaraz po importach sys i subprocess
try:
    import xgboost
except ImportError:
    print("--- ⚠️ BRAK XGBOOST. Rozpoczynam awaryjną instalację... ---")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "xgboost"])
    print("--- ✅ XGBoost zainstalowany pomyślnie! ---")
    import xgboost
# --------------------------------------

import joblib
import pandas as pd
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
from typing import List
from huggingface_hub import hf_hub_download
from contextlib import asynccontextmanager

# --- Sekcja Konfiguracji Modelu ---
# Upewnij się, że nazwa pliku jest zgodna z tym co masz w Files!
# Wcześniej w logach miałeś 'model_raport.pkl', teraz w kodzie masz 'model.pkl'.
# Zostawiam 'model.pkl', ale sprawdź to!
MODEL_FILE_NAME = 'model.pkl' 
MODEL_REPO_ID = 'zotthytt12/model_hr' 

MODEL_FEATURES_ORDER = [
    'Experience (Years)', 'Education', 'Certifications', 'Job Role', 
    'Salary Expectation ($)', 'Projects Count', 'C++', 'Cybersecurity', 
    'Deep Learning', 'Ethical Hacking', 'Java', 'Linux', 
    'Machine Learning', 'NLP', 'Networking', 'Python', 'Pytorch', 
    'React', 'SQL', 'TensorFlow'
]

# --- Globalna zmienna na model ---
model = None

# --- 2. Definicja cyklu życia aplikacji (Lifespan) ---
@asynccontextmanager
async def lifespan(app: FastAPI):
    # Kod uruchamiany przy starcie
    global model
    print("--- Rozpoczynanie ładowania modelu z Huba... ---")
    try:
        model_path = hf_hub_download(
            repo_id=MODEL_REPO_ID,
            filename=MODEL_FILE_NAME
        )
        # Tutaj joblib użyje zainstalowanego wyżej xgboost
        model = joblib.load(model_path)
        
        print(f"--- Pomyślnie pobrano i wczytano model z Huba: {MODEL_REPO_ID} ---")
        
        # 🧹 Naprawa nazw kolumn – usuwamy spacje z przodu i końca
        if hasattr(model, "feature_names_in_"):
            clean_names = [f.strip() for f in model.feature_names_in_]
            model.feature_names_in_ = clean_names
            print("🧹 Oczyszczone feature_names_in_:", model.feature_names_in_)
            
    except Exception as e:
        print(f"BŁĄD KRYTYCZNY: Nie można wczytać modelu z Huba ({MODEL_REPO_ID}). Błąd: {e}")
        # Nie przerywamy yield, żeby aplikacja wstała i pokazała błąd w HTTP 503
    
    yield
    print("--- Zamykanie aplikacji ---")

# --- 3. Definicja API ---
app = FastAPI(
    title="API Rankingu CV",
    description="API oceniania kandydatów (XGBoost/RandomForest)",
    lifespan=lifespan
)

# --- 4. Modele danych (Pydantic) ---
class CandidateFeatures(BaseModel):
    identifier: str = Field(..., description="ID kandydata")
    Experience_Years: float = Field(..., alias="Experience (Years)")
    Education: float
    Certifications: float
    Job_Role: float = Field(..., alias="Job Role")
    Salary_Expectation: float = Field(..., alias="Salary Expectation ($)")
    Projects_Count: float = Field(..., alias="Projects Count")
    Cpp: float = Field(..., alias="C++")
    Cybersecurity: float
    Deep_Learning: float = Field(..., alias="Deep Learning")
    Ethical_Hacking: float = Field(..., alias="Ethical Hacking")
    Java: float
    Linux: float
    Machine_Learning: float = Field(..., alias="Machine Learning")
    NLP: float
    Networking: float
    Python: float
    Pytorch: float
    React: float
    SQL: float
    TensorFlow: float

    class Config:
        populate_by_name = True

class RankingRequest(BaseModel):
    candidates: List[CandidateFeatures]

class RankedCandidate(BaseModel):
    identifier: str
    score: float

class RankingResponse(BaseModel):
    ranked_candidates: List[RankedCandidate]

# --- 5. Punkty końcowe API ---

@app.get("/")
def read_root():
    return {"status": "OK", "message": "API działa poprawnie"}

@app.post("/rank", response_model=RankingResponse)
def rank_candidates(request: RankingRequest):
    global model
    if model is None:
        raise HTTPException(status_code=503, detail="Model nie jest gotowy. Sprawdź logi aplikacji.")
    
    if not request.candidates:
        return {"ranked_candidates": []}

    try:
        # Konwersja danych
        candidate_data_list = [c.model_dump(by_alias=True) for c in request.candidates]
        identifiers = [c['identifier'] for c in candidate_data_list]
        
        # DataFrame
        df = pd.DataFrame(candidate_data_list)
        features_df = df.drop(columns=['identifier'])
        
        # Dopasowanie kolumn do modelu
        features_df_ordered = features_df.reindex(columns=model.feature_names_in_, fill_value=0)
        
        # Predykcja
        probabilities = model.predict_proba(features_df_ordered)[:, 1]
        
        # Wynik
        ranked_list = []
        for i, identifier in enumerate(identifiers):
            ranked_list.append(RankedCandidate(
                identifier=identifier,
                score=float(probabilities[i])
            ))
            
        # Sortowanie
        sorted_ranked_list = sorted(ranked_list, key=lambda x: x.score, reverse=True)
        return {"ranked_candidates": sorted_ranked_list}

    except Exception as e:
        print(f"Błąd podczas predykcji: {e}")
        raise HTTPException(status_code=500, detail=f"Błąd serwera: {str(e)}")

# Uruchomienie lokalne
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)