Spaces:
Sleeping
Sleeping
| import subprocess | |
| import sys | |
| import os | |
| # --- 1. AWARYJNA INSTALACJA XGBOOST --- | |
| # Ten fragment musi być na samej górze, zaraz po importach sys i subprocess | |
| try: | |
| import xgboost | |
| except ImportError: | |
| print("--- ⚠️ BRAK XGBOOST. Rozpoczynam awaryjną instalację... ---") | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", "xgboost"]) | |
| print("--- ✅ XGBoost zainstalowany pomyślnie! ---") | |
| import xgboost | |
| # -------------------------------------- | |
| import joblib | |
| import pandas as pd | |
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel, Field | |
| from typing import List | |
| from huggingface_hub import hf_hub_download | |
| from contextlib import asynccontextmanager | |
| # --- Sekcja Konfiguracji Modelu --- | |
| # Upewnij się, że nazwa pliku jest zgodna z tym co masz w Files! | |
| # Wcześniej w logach miałeś 'model_raport.pkl', teraz w kodzie masz 'model.pkl'. | |
| # Zostawiam 'model.pkl', ale sprawdź to! | |
| MODEL_FILE_NAME = 'model.pkl' | |
| MODEL_REPO_ID = 'zotthytt12/model_hr' | |
| MODEL_FEATURES_ORDER = [ | |
| 'Experience (Years)', 'Education', 'Certifications', 'Job Role', | |
| 'Salary Expectation ($)', 'Projects Count', 'C++', 'Cybersecurity', | |
| 'Deep Learning', 'Ethical Hacking', 'Java', 'Linux', | |
| 'Machine Learning', 'NLP', 'Networking', 'Python', 'Pytorch', | |
| 'React', 'SQL', 'TensorFlow' | |
| ] | |
| # --- Globalna zmienna na model --- | |
| model = None | |
| # --- 2. Definicja cyklu życia aplikacji (Lifespan) --- | |
| async def lifespan(app: FastAPI): | |
| # Kod uruchamiany przy starcie | |
| global model | |
| print("--- Rozpoczynanie ładowania modelu z Huba... ---") | |
| try: | |
| model_path = hf_hub_download( | |
| repo_id=MODEL_REPO_ID, | |
| filename=MODEL_FILE_NAME | |
| ) | |
| # Tutaj joblib użyje zainstalowanego wyżej xgboost | |
| model = joblib.load(model_path) | |
| print(f"--- Pomyślnie pobrano i wczytano model z Huba: {MODEL_REPO_ID} ---") | |
| # 🧹 Naprawa nazw kolumn – usuwamy spacje z przodu i końca | |
| if hasattr(model, "feature_names_in_"): | |
| clean_names = [f.strip() for f in model.feature_names_in_] | |
| model.feature_names_in_ = clean_names | |
| print("🧹 Oczyszczone feature_names_in_:", model.feature_names_in_) | |
| except Exception as e: | |
| print(f"BŁĄD KRYTYCZNY: Nie można wczytać modelu z Huba ({MODEL_REPO_ID}). Błąd: {e}") | |
| # Nie przerywamy yield, żeby aplikacja wstała i pokazała błąd w HTTP 503 | |
| yield | |
| print("--- Zamykanie aplikacji ---") | |
| # --- 3. Definicja API --- | |
| app = FastAPI( | |
| title="API Rankingu CV", | |
| description="API oceniania kandydatów (XGBoost/RandomForest)", | |
| lifespan=lifespan | |
| ) | |
| # --- 4. Modele danych (Pydantic) --- | |
| class CandidateFeatures(BaseModel): | |
| identifier: str = Field(..., description="ID kandydata") | |
| Experience_Years: float = Field(..., alias="Experience (Years)") | |
| Education: float | |
| Certifications: float | |
| Job_Role: float = Field(..., alias="Job Role") | |
| Salary_Expectation: float = Field(..., alias="Salary Expectation ($)") | |
| Projects_Count: float = Field(..., alias="Projects Count") | |
| Cpp: float = Field(..., alias="C++") | |
| Cybersecurity: float | |
| Deep_Learning: float = Field(..., alias="Deep Learning") | |
| Ethical_Hacking: float = Field(..., alias="Ethical Hacking") | |
| Java: float | |
| Linux: float | |
| Machine_Learning: float = Field(..., alias="Machine Learning") | |
| NLP: float | |
| Networking: float | |
| Python: float | |
| Pytorch: float | |
| React: float | |
| SQL: float | |
| TensorFlow: float | |
| class Config: | |
| populate_by_name = True | |
| class RankingRequest(BaseModel): | |
| candidates: List[CandidateFeatures] | |
| class RankedCandidate(BaseModel): | |
| identifier: str | |
| score: float | |
| class RankingResponse(BaseModel): | |
| ranked_candidates: List[RankedCandidate] | |
| # --- 5. Punkty końcowe API --- | |
| def read_root(): | |
| return {"status": "OK", "message": "API działa poprawnie"} | |
| def rank_candidates(request: RankingRequest): | |
| global model | |
| if model is None: | |
| raise HTTPException(status_code=503, detail="Model nie jest gotowy. Sprawdź logi aplikacji.") | |
| if not request.candidates: | |
| return {"ranked_candidates": []} | |
| try: | |
| # Konwersja danych | |
| candidate_data_list = [c.model_dump(by_alias=True) for c in request.candidates] | |
| identifiers = [c['identifier'] for c in candidate_data_list] | |
| # DataFrame | |
| df = pd.DataFrame(candidate_data_list) | |
| features_df = df.drop(columns=['identifier']) | |
| # Dopasowanie kolumn do modelu | |
| features_df_ordered = features_df.reindex(columns=model.feature_names_in_, fill_value=0) | |
| # Predykcja | |
| probabilities = model.predict_proba(features_df_ordered)[:, 1] | |
| # Wynik | |
| ranked_list = [] | |
| for i, identifier in enumerate(identifiers): | |
| ranked_list.append(RankedCandidate( | |
| identifier=identifier, | |
| score=float(probabilities[i]) | |
| )) | |
| # Sortowanie | |
| sorted_ranked_list = sorted(ranked_list, key=lambda x: x.score, reverse=True) | |
| return {"ranked_candidates": sorted_ranked_list} | |
| except Exception as e: | |
| print(f"Błąd podczas predykcji: {e}") | |
| raise HTTPException(status_code=500, detail=f"Błąd serwera: {str(e)}") | |
| # Uruchomienie lokalne | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=8000) |