Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,8 +3,10 @@ import pandas as pd
|
|
| 3 |
from fastapi import FastAPI, HTTPException
|
| 4 |
from pydantic import BaseModel, Field
|
| 5 |
from typing import List
|
|
|
|
| 6 |
|
| 7 |
MODEL_FILE_NAME = 'model_raport.pkl'
|
|
|
|
| 8 |
|
| 9 |
MODEL_FEATURES_ORDER = [
|
| 10 |
'Experience (Years)', 'Education', 'Certifications', 'Job Role',
|
|
@@ -14,17 +16,19 @@ MODEL_FEATURES_ORDER = [
|
|
| 14 |
'React', 'SQL', 'TensorFlow'
|
| 15 |
]
|
| 16 |
|
|
|
|
| 17 |
model = None
|
| 18 |
|
|
|
|
| 19 |
app = FastAPI(
|
| 20 |
title="API Rankingu CV",
|
| 21 |
description="API, kt贸re przyjmuje list臋 kandydat贸w, ocenia ich za pomoc膮 modelu RandomForest i zwraca ranking."
|
| 22 |
)
|
| 23 |
|
| 24 |
-
class CandidateFeatures(BaseModel):
|
| 25 |
|
|
|
|
|
|
|
| 26 |
identifier: str = Field(..., description="Unikalny identyfikator kandydata, np. email lub ID.")
|
| 27 |
-
|
| 28 |
Experience_Years: float = Field(..., alias="Experience (Years)")
|
| 29 |
Education: float
|
| 30 |
Certifications: float
|
|
@@ -50,33 +54,35 @@ class CandidateFeatures(BaseModel):
|
|
| 50 |
populate_by_name = True
|
| 51 |
|
| 52 |
class RankingRequest(BaseModel):
|
| 53 |
-
"""Definiuje format zapytania - oczekujemy listy kandydat贸w."""
|
| 54 |
candidates: List[CandidateFeatures]
|
| 55 |
|
| 56 |
class RankedCandidate(BaseModel):
|
| 57 |
-
"""Definiuje format odpowiedzi dla jednego kandydata."""
|
| 58 |
identifier: str
|
| 59 |
score: float = Field(..., description="Prawdopodobie艅stwo zaproszenia (0.0 do 1.0)")
|
| 60 |
|
| 61 |
class RankingResponse(BaseModel):
|
| 62 |
-
"""Definiuje format odpowiedzi - zwracamy list臋 ocenionych kandydat贸w."""
|
| 63 |
ranked_candidates: List[RankedCandidate]
|
| 64 |
|
| 65 |
|
| 66 |
-
# --- 2. 艁adowanie modelu ---
|
| 67 |
@app.on_event("startup")
|
| 68 |
-
def
|
| 69 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 70 |
global model
|
| 71 |
try:
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
except Exception as e:
|
| 77 |
-
print(f"B艁膭D KRYTYCZNY: Nie mo偶na wczyta膰 modelu z
|
| 78 |
|
| 79 |
-
# --- 3. Punkty ko艅cowe API (Endpoints) ---
|
| 80 |
|
| 81 |
@app.get("/")
|
| 82 |
def read_root():
|
|
@@ -87,35 +93,22 @@ def read_root():
|
|
| 87 |
def rank_candidates(request: RankingRequest):
|
| 88 |
global model
|
| 89 |
if model is None:
|
| 90 |
-
|
|
|
|
| 91 |
|
| 92 |
if not request.candidates:
|
| 93 |
return {"ranked_candidates": []}
|
| 94 |
|
| 95 |
try:
|
| 96 |
-
# 1. Konwertuj list臋 kandydat贸w (z Pydantic) na list臋 s艂ownik贸w
|
| 97 |
-
# U偶ywamy .model_dump(by_alias=True), aby uzyska膰 nazwy z alias贸w (np. "C++")
|
| 98 |
candidate_data_list = [c.model_dump(by_alias=True) for c in request.candidates]
|
| 99 |
-
|
| 100 |
-
# Przechowujemy identyfikatory do p贸藕niejszego u偶ycia
|
| 101 |
identifiers = [c['identifier'] for c in candidate_data_list]
|
| 102 |
|
| 103 |
-
# 2. Stw贸rz DataFrame z poprawn膮 kolejno艣ci膮 kolumn
|
| 104 |
-
# To jest absolutnie krytyczne dla modelu scikit-learn!
|
| 105 |
df = pd.DataFrame(candidate_data_list)
|
| 106 |
-
|
| 107 |
-
# Upewnij si臋, 偶e brakuje tylko kolumny 'identifier', a reszta pasuje
|
| 108 |
features_df = df.drop(columns=['identifier'])
|
| 109 |
-
|
| 110 |
-
# Ustaw kolejno艣膰 kolumn DOK艁ADNIE tak, jak w treningu
|
| 111 |
features_df_ordered = features_df[MODEL_FEATURES_ORDER]
|
| 112 |
|
| 113 |
-
# 3. Predykcja
|
| 114 |
-
# U偶ywamy predict_proba(), aby dosta膰 prawdopodobie艅stwo, a nie tylko 0 lub 1
|
| 115 |
-
# [:, 1] oznacza, 偶e bierzemy prawdopodobie艅stwo dla klasy '1' (Zaproszony)
|
| 116 |
probabilities = model.predict_proba(features_df_ordered)[:, 1]
|
| 117 |
|
| 118 |
-
# 4. Tworzenie odpowiedzi
|
| 119 |
ranked_list = []
|
| 120 |
for i, identifier in enumerate(identifiers):
|
| 121 |
ranked_list.append(RankedCandidate(
|
|
@@ -123,21 +116,18 @@ def rank_candidates(request: RankingRequest):
|
|
| 123 |
score=probabilities[i]
|
| 124 |
))
|
| 125 |
|
| 126 |
-
# 5. Sortowanie
|
| 127 |
-
# Sortuj list臋 kandydat贸w malej膮co (descending) po wyniku (score)
|
| 128 |
sorted_ranked_list = sorted(ranked_list, key=lambda x: x.score, reverse=True)
|
| 129 |
|
| 130 |
return {"ranked_candidates": sorted_ranked_list}
|
| 131 |
|
| 132 |
except KeyError as e:
|
| 133 |
-
# Ten b艂膮d wyst膮pi, je艣li w danych wej艣ciowych brakuje jakiej艣 cechy
|
| 134 |
raise HTTPException(status_code=400, detail=f"Brakuj膮ca lub b艂臋dna cecha (KeyError): {e}")
|
| 135 |
except Exception as e:
|
| 136 |
-
# Og贸lny b艂膮d serwera
|
| 137 |
raise HTTPException(status_code=500, detail=f"Wyst膮pi艂 wewn臋trzny b艂膮d serwera: {str(e)}")
|
| 138 |
|
| 139 |
# Uruchomienie aplikacji (dla test贸w lokalnych)
|
| 140 |
-
# Hugging Face Spaces u偶yje w艂asnego serwera (uvicorn), ale to jest przydatne
|
| 141 |
if __name__ == "__main__":
|
| 142 |
import uvicorn
|
|
|
|
|
|
|
| 143 |
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
|
| 3 |
from fastapi import FastAPI, HTTPException
|
| 4 |
from pydantic import BaseModel, Field
|
| 5 |
from typing import List
|
| 6 |
+
from huggingface_hub import hf_hub_download
|
| 7 |
|
| 8 |
MODEL_FILE_NAME = 'model_raport.pkl'
|
| 9 |
+
MODEL_REPO_ID = 'zotthytt12/model_hr'
|
| 10 |
|
| 11 |
MODEL_FEATURES_ORDER = [
|
| 12 |
'Experience (Years)', 'Education', 'Certifications', 'Job Role',
|
|
|
|
| 16 |
'React', 'SQL', 'TensorFlow'
|
| 17 |
]
|
| 18 |
|
| 19 |
+
|
| 20 |
model = None
|
| 21 |
|
| 22 |
+
|
| 23 |
app = FastAPI(
|
| 24 |
title="API Rankingu CV",
|
| 25 |
description="API, kt贸re przyjmuje list臋 kandydat贸w, ocenia ich za pomoc膮 modelu RandomForest i zwraca ranking."
|
| 26 |
)
|
| 27 |
|
|
|
|
| 28 |
|
| 29 |
+
class CandidateFeatures(BaseModel):
|
| 30 |
+
"""Definiuje cechy JEDNEGO kandydata."""
|
| 31 |
identifier: str = Field(..., description="Unikalny identyfikator kandydata, np. email lub ID.")
|
|
|
|
| 32 |
Experience_Years: float = Field(..., alias="Experience (Years)")
|
| 33 |
Education: float
|
| 34 |
Certifications: float
|
|
|
|
| 54 |
populate_by_name = True
|
| 55 |
|
| 56 |
class RankingRequest(BaseModel):
|
|
|
|
| 57 |
candidates: List[CandidateFeatures]
|
| 58 |
|
| 59 |
class RankedCandidate(BaseModel):
|
|
|
|
| 60 |
identifier: str
|
| 61 |
score: float = Field(..., description="Prawdopodobie艅stwo zaproszenia (0.0 do 1.0)")
|
| 62 |
|
| 63 |
class RankingResponse(BaseModel):
|
|
|
|
| 64 |
ranked_candidates: List[RankedCandidate]
|
| 65 |
|
| 66 |
|
|
|
|
| 67 |
@app.on_event("startup")
|
| 68 |
+
def load_model_from_hub():
|
| 69 |
+
"""
|
| 70 |
+
Pobiera model .pkl z Hugging Face Hub i wczytuje go
|
| 71 |
+
przy starcie aplikacji.
|
| 72 |
+
"""
|
| 73 |
global model
|
| 74 |
try:
|
| 75 |
+
model_path = hf_hub_download(
|
| 76 |
+
repo_id=MODEL_REPO_ID,
|
| 77 |
+
filename=MODEL_FILE_NAME
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
model = joblib.load(model_path)
|
| 81 |
+
print(f"--- Pomy艣lnie pobrano i wczytano model z Huba: {MODEL_REPO_ID} ---")
|
| 82 |
+
|
| 83 |
except Exception as e:
|
| 84 |
+
print(f"B艁膭D KRYTYCZNY: Nie mo偶na wczyta膰 modelu z Huba ({MODEL_REPO_ID}). B艂膮d: {e}")
|
| 85 |
|
|
|
|
| 86 |
|
| 87 |
@app.get("/")
|
| 88 |
def read_root():
|
|
|
|
| 93 |
def rank_candidates(request: RankingRequest):
|
| 94 |
global model
|
| 95 |
if model is None:
|
| 96 |
+
# Je艣li model si臋 nie za艂adowa艂 przy starcie, zwr贸膰 b艂膮d
|
| 97 |
+
raise HTTPException(status_code=503, detail="Model nie jest jeszcze gotowy. Sprawd藕 logi serwera.")
|
| 98 |
|
| 99 |
if not request.candidates:
|
| 100 |
return {"ranked_candidates": []}
|
| 101 |
|
| 102 |
try:
|
|
|
|
|
|
|
| 103 |
candidate_data_list = [c.model_dump(by_alias=True) for c in request.candidates]
|
|
|
|
|
|
|
| 104 |
identifiers = [c['identifier'] for c in candidate_data_list]
|
| 105 |
|
|
|
|
|
|
|
| 106 |
df = pd.DataFrame(candidate_data_list)
|
|
|
|
|
|
|
| 107 |
features_df = df.drop(columns=['identifier'])
|
|
|
|
|
|
|
| 108 |
features_df_ordered = features_df[MODEL_FEATURES_ORDER]
|
| 109 |
|
|
|
|
|
|
|
|
|
|
| 110 |
probabilities = model.predict_proba(features_df_ordered)[:, 1]
|
| 111 |
|
|
|
|
| 112 |
ranked_list = []
|
| 113 |
for i, identifier in enumerate(identifiers):
|
| 114 |
ranked_list.append(RankedCandidate(
|
|
|
|
| 116 |
score=probabilities[i]
|
| 117 |
))
|
| 118 |
|
|
|
|
|
|
|
| 119 |
sorted_ranked_list = sorted(ranked_list, key=lambda x: x.score, reverse=True)
|
| 120 |
|
| 121 |
return {"ranked_candidates": sorted_ranked_list}
|
| 122 |
|
| 123 |
except KeyError as e:
|
|
|
|
| 124 |
raise HTTPException(status_code=400, detail=f"Brakuj膮ca lub b艂臋dna cecha (KeyError): {e}")
|
| 125 |
except Exception as e:
|
|
|
|
| 126 |
raise HTTPException(status_code=500, detail=f"Wyst膮pi艂 wewn臋trzny b艂膮d serwera: {str(e)}")
|
| 127 |
|
| 128 |
# Uruchomienie aplikacji (dla test贸w lokalnych)
|
|
|
|
| 129 |
if __name__ == "__main__":
|
| 130 |
import uvicorn
|
| 131 |
+
# Uruchom r臋cznie 艂adowanie modelu dla test贸w lokalnych
|
| 132 |
+
load_model_from_hub()
|
| 133 |
uvicorn.run(app, host="0.0.0.0", port=8000)
|