Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import joblib
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from fastapi import FastAPI, HTTPException
|
| 4 |
+
from pydantic import BaseModel, Field
|
| 5 |
+
from typing import List
|
| 6 |
+
|
| 7 |
+
MODEL_FILE_NAME = 'model_raport.pkl'
|
| 8 |
+
|
| 9 |
+
MODEL_FEATURES_ORDER = [
|
| 10 |
+
'Experience (Years)', 'Education', 'Certifications', 'Job Role',
|
| 11 |
+
'Salary Expectation ($)', 'Projects Count', 'C++', 'Cybersecurity',
|
| 12 |
+
'Deep Learning', 'Ethical Hacking', 'Java', 'Linux',
|
| 13 |
+
'Machine Learning', 'NLP', 'Networking', 'Python', 'Pytorch',
|
| 14 |
+
'React', 'SQL', 'TensorFlow'
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
model = None
|
| 18 |
+
|
| 19 |
+
app = FastAPI(
|
| 20 |
+
title="API Rankingu CV",
|
| 21 |
+
description="API, kt贸re przyjmuje list臋 kandydat贸w, ocenia ich za pomoc膮 modelu RandomForest i zwraca ranking."
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
class CandidateFeatures(BaseModel):
|
| 25 |
+
|
| 26 |
+
identifier: str = Field(..., description="Unikalny identyfikator kandydata, np. email lub ID.")
|
| 27 |
+
|
| 28 |
+
Experience_Years: float = Field(..., alias="Experience (Years)")
|
| 29 |
+
Education: float
|
| 30 |
+
Certifications: float
|
| 31 |
+
Job_Role: float = Field(..., alias="Job Role")
|
| 32 |
+
Salary_Expectation: float = Field(..., alias="Salary Expectation ($)")
|
| 33 |
+
Projects_Count: float = Field(..., alias="Projects Count")
|
| 34 |
+
Cpp: float = Field(..., alias="C++")
|
| 35 |
+
Cybersecurity: float
|
| 36 |
+
Deep_Learning: float = Field(..., alias="Deep Learning")
|
| 37 |
+
Ethical_Hacking: float = Field(..., alias="Ethical Hacking")
|
| 38 |
+
Java: float
|
| 39 |
+
Linux: float
|
| 40 |
+
Machine_Learning: float = Field(..., alias="Machine Learning")
|
| 41 |
+
NLP: float
|
| 42 |
+
Networking: float
|
| 43 |
+
Python: float
|
| 44 |
+
Pytorch: float
|
| 45 |
+
React: float
|
| 46 |
+
SQL: float
|
| 47 |
+
TensorFlow: float
|
| 48 |
+
|
| 49 |
+
class Config:
|
| 50 |
+
populate_by_name = True
|
| 51 |
+
|
| 52 |
+
class RankingRequest(BaseModel):
|
| 53 |
+
"""Definiuje format zapytania - oczekujemy listy kandydat贸w."""
|
| 54 |
+
candidates: List[CandidateFeatures]
|
| 55 |
+
|
| 56 |
+
class RankedCandidate(BaseModel):
|
| 57 |
+
"""Definiuje format odpowiedzi dla jednego kandydata."""
|
| 58 |
+
identifier: str
|
| 59 |
+
score: float = Field(..., description="Prawdopodobie艅stwo zaproszenia (0.0 do 1.0)")
|
| 60 |
+
|
| 61 |
+
class RankingResponse(BaseModel):
|
| 62 |
+
"""Definiuje format odpowiedzi - zwracamy list臋 ocenionych kandydat贸w."""
|
| 63 |
+
ranked_candidates: List[RankedCandidate]
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# --- 2. 艁adowanie modelu ---
|
| 67 |
+
@app.on_event("startup")
|
| 68 |
+
def load_model():
|
| 69 |
+
"""Wczytuje model .pkl przy starcie aplikacji."""
|
| 70 |
+
global model
|
| 71 |
+
try:
|
| 72 |
+
model = joblib.load(MODEL_FILE_NAME)
|
| 73 |
+
print(f"--- Pomy艣lnie wczytano model z pliku: {MODEL_FILE_NAME} ---")
|
| 74 |
+
except FileNotFoundError:
|
| 75 |
+
print(f"B艁膭D KRYTYCZNY: Nie znaleziono pliku modelu: {MODEL_FILE_NAME}")
|
| 76 |
+
except Exception as e:
|
| 77 |
+
print(f"B艁膭D KRYTYCZNY: Nie mo偶na wczyta膰 modelu z pliku {MODEL_FILE_NAME}. B艂膮d: {e}")
|
| 78 |
+
|
| 79 |
+
# --- 3. Punkty ko艅cowe API (Endpoints) ---
|
| 80 |
+
|
| 81 |
+
@app.get("/")
|
| 82 |
+
def read_root():
|
| 83 |
+
return {"status": "OK", "message": "Witaj w API do Rankingu CV!"}
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
@app.post("/rank", response_model=RankingResponse)
|
| 87 |
+
def rank_candidates(request: RankingRequest):
|
| 88 |
+
global model
|
| 89 |
+
if model is None:
|
| 90 |
+
raise HTTPException(status_code=503, detail="Model nie jest jeszcze gotowy. Spr贸buj ponownie za chwil臋.")
|
| 91 |
+
|
| 92 |
+
if not request.candidates:
|
| 93 |
+
return {"ranked_candidates": []}
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
# 1. Konwertuj list臋 kandydat贸w (z Pydantic) na list臋 s艂ownik贸w
|
| 97 |
+
# U偶ywamy .model_dump(by_alias=True), aby uzyska膰 nazwy z alias贸w (np. "C++")
|
| 98 |
+
candidate_data_list = [c.model_dump(by_alias=True) for c in request.candidates]
|
| 99 |
+
|
| 100 |
+
# Przechowujemy identyfikatory do p贸藕niejszego u偶ycia
|
| 101 |
+
identifiers = [c['identifier'] for c in candidate_data_list]
|
| 102 |
+
|
| 103 |
+
# 2. Stw贸rz DataFrame z poprawn膮 kolejno艣ci膮 kolumn
|
| 104 |
+
# To jest absolutnie krytyczne dla modelu scikit-learn!
|
| 105 |
+
df = pd.DataFrame(candidate_data_list)
|
| 106 |
+
|
| 107 |
+
# Upewnij si臋, 偶e brakuje tylko kolumny 'identifier', a reszta pasuje
|
| 108 |
+
features_df = df.drop(columns=['identifier'])
|
| 109 |
+
|
| 110 |
+
# Ustaw kolejno艣膰 kolumn DOK艁ADNIE tak, jak w treningu
|
| 111 |
+
features_df_ordered = features_df[MODEL_FEATURES_ORDER]
|
| 112 |
+
|
| 113 |
+
# 3. Predykcja
|
| 114 |
+
# U偶ywamy predict_proba(), aby dosta膰 prawdopodobie艅stwo, a nie tylko 0 lub 1
|
| 115 |
+
# [:, 1] oznacza, 偶e bierzemy prawdopodobie艅stwo dla klasy '1' (Zaproszony)
|
| 116 |
+
probabilities = model.predict_proba(features_df_ordered)[:, 1]
|
| 117 |
+
|
| 118 |
+
# 4. Tworzenie odpowiedzi
|
| 119 |
+
ranked_list = []
|
| 120 |
+
for i, identifier in enumerate(identifiers):
|
| 121 |
+
ranked_list.append(RankedCandidate(
|
| 122 |
+
identifier=identifier,
|
| 123 |
+
score=probabilities[i]
|
| 124 |
+
))
|
| 125 |
+
|
| 126 |
+
# 5. Sortowanie
|
| 127 |
+
# Sortuj list臋 kandydat贸w malej膮co (descending) po wyniku (score)
|
| 128 |
+
sorted_ranked_list = sorted(ranked_list, key=lambda x: x.score, reverse=True)
|
| 129 |
+
|
| 130 |
+
return {"ranked_candidates": sorted_ranked_list}
|
| 131 |
+
|
| 132 |
+
except KeyError as e:
|
| 133 |
+
# Ten b艂膮d wyst膮pi, je艣li w danych wej艣ciowych brakuje jakiej艣 cechy
|
| 134 |
+
raise HTTPException(status_code=400, detail=f"Brakuj膮ca lub b艂臋dna cecha (KeyError): {e}")
|
| 135 |
+
except Exception as e:
|
| 136 |
+
# Og贸lny b艂膮d serwera
|
| 137 |
+
raise HTTPException(status_code=500, detail=f"Wyst膮pi艂 wewn臋trzny b艂膮d serwera: {str(e)}")
|
| 138 |
+
|
| 139 |
+
# Uruchomienie aplikacji (dla test贸w lokalnych)
|
| 140 |
+
# Hugging Face Spaces u偶yje w艂asnego serwera (uvicorn), ale to jest przydatne
|
| 141 |
+
if __name__ == "__main__":
|
| 142 |
+
import uvicorn
|
| 143 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|