Spaces:

zotthytt12
/

hr_classifier

Sleeping

App Files Files Community

zotthytt12 commited on Nov 4, 2025

Commit

573e1e6

verified ·

1 Parent(s): 1e76509

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -10

app.py CHANGED Viewed

@@ -5,7 +5,9 @@ from pydantic import BaseModel, Field
 from typing import List
 from huggingface_hub import hf_hub_download
 MODEL_FILE_NAME = 'model_raport.pkl'
 MODEL_REPO_ID = 'zotthytt12/model_hr'
 MODEL_FEATURES_ORDER = [
@@ -16,15 +18,16 @@ MODEL_FEATURES_ORDER = [
     'React', 'SQL', 'TensorFlow'
 ]
 model = None
 app = FastAPI(
     title="API Rankingu CV",
     description="API, które przyjmuje listę kandydatów, ocenia ich za pomocą modelu RandomForest i zwraca ranking."
 )
 class CandidateFeatures(BaseModel):
     """Definiuje cechy JEDNEGO kandydata."""
@@ -54,23 +57,28 @@ class CandidateFeatures(BaseModel):
         populate_by_name = True
 class RankingRequest(BaseModel):
     candidates: List[CandidateFeatures]
 class RankedCandidate(BaseModel):
     identifier: str
     score: float = Field(..., description="Prawdopodobieństwo zaproszenia (0.0 do 1.0)")
 class RankingResponse(BaseModel):
     ranked_candidates: List[RankedCandidate]
-@app.on_event("startup")
-def load_model_from_hub():
-    """
-    Pobiera model .pkl z Hugging Face Hub i wczytuje go
-    przy starcie aplikacji.
-    """
     global model
     try:
         model_path = hf_hub_download(
             repo_id=MODEL_REPO_ID,
@@ -82,15 +90,29 @@ def load_model_from_hub():
     except Exception as e:
         print(f"BŁĄD KRYTYCZNY: Nie można wczytać modelu z Huba ({MODEL_REPO_ID}). Błąd: {e}")
 @app.get("/")
 def read_root():
     return {"status": "OK", "message": "Witaj w API do Rankingu CV!"}
 @app.post("/rank", response_model=RankingResponse)
 def rank_candidates(request: RankingRequest):
     global model
     if model is None:
         # Jeśli model się nie załadował przy starcie, zwróć błąd
@@ -100,15 +122,23 @@ def rank_candidates(request: RankingRequest):
         return {"ranked_candidates": []}
     try:
         candidate_data_list = [c.model_dump(by_alias=True) for c in request.candidates]
         identifiers = [c['identifier'] for c in candidate_data_list]
         df = pd.DataFrame(candidate_data_list)
         features_df = df.drop(columns=['identifier'])
         features_df_ordered = features_df[MODEL_FEATURES_ORDER]
         probabilities = model.predict_proba(features_df_ordered)[:, 1]
         ranked_list = []
         for i, identifier in enumerate(identifiers):
             ranked_list.append(RankedCandidate(
@@ -116,6 +146,7 @@ def rank_candidates(request: RankingRequest):
                 score=probabilities[i]
             ))
         sorted_ranked_list = sorted(ranked_list, key=lambda x: x.score, reverse=True)
         return {"ranked_candidates": sorted_ranked_list}
@@ -128,6 +159,8 @@ def rank_candidates(request: RankingRequest):
 # Uruchomienie aplikacji (dla testów lokalnych)
 if __name__ == "__main__":
     import uvicorn
-    # Uruchom ręcznie ładowanie modelu dla testów lokalnych
-    load_model_from_hub()
     uvicorn.run(app, host="0.0.0.0", port=8000)

 from typing import List
 from huggingface_hub import hf_hub_download
+# --- Sekcja Konfiguracji Modelu ---
 MODEL_FILE_NAME = 'model_raport.pkl'
+# Upewnij się, że ta nazwa repozytorium jest poprawna!
 MODEL_REPO_ID = 'zotthytt12/model_hr'
 MODEL_FEATURES_ORDER = [
     'React', 'SQL', 'TensorFlow'
 ]
+# --- Globalna zmienna na model ---
 model = None
+# --- Definicja API (FastAPI) ---
 app = FastAPI(
     title="API Rankingu CV",
     description="API, które przyjmuje listę kandydatów, ocenia ich za pomocą modelu RandomForest i zwraca ranking."
 )
+# --- 1. Modele danych (Pydantic) ---
 class CandidateFeatures(BaseModel):
     """Definiuje cechy JEDNEGO kandydata."""
         populate_by_name = True
 class RankingRequest(BaseModel):
+    """Definiuje format zapytania - oczekujemy listy kandydatów."""
     candidates: List[CandidateFeatures]
 class RankedCandidate(BaseModel):
+    """Definiuje format odpowiedzi dla jednego kandydata."""
     identifier: str
     score: float = Field(..., description="Prawdopodobieństwo zaproszenia (0.0 do 1.0)")
 class RankingResponse(BaseModel):
+    """Definiuje format odpowiedzi - zwracamy listę ocenionych kandydatów."""
     ranked_candidates: List[RankedCandidate]
+# --- 2. Ładowanie modelu ---
+# (Używamy nowszego 'lifespan' zamiast 'on_event')
+from contextlib import asynccontextmanager
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Kod uruchamiany przy starcie
     global model
+    print("--- Rozpoczynanie ładowania modelu z Huba... ---")
     try:
         model_path = hf_hub_download(
             repo_id=MODEL_REPO_ID,
     except Exception as e:
         print(f"BŁĄD KRYTYCZNY: Nie można wczytać modelu z Huba ({MODEL_REPO_ID}). Błąd: {e}")
+    yield
+    # Kod uruchamiany przy zamknięciu (jeśli potrzebny)
+    print("--- Zamykanie aplikacji ---")
+# Przypisz funkcję lifespan do aplikacji
+app.router.lifespan_context = lifespan
+# --- 3. Punkty końcowe API (Endpoints) ---
 @app.get("/")
 def read_root():
+    """Podstawowy endpoint (główna strona) do sprawdzania, czy API działa."""
     return {"status": "OK", "message": "Witaj w API do Rankingu CV!"}
 @app.post("/rank", response_model=RankingResponse)
 def rank_candidates(request: RankingRequest):
+    """
+    Ten endpoint przyjmuje listę kandydatów, przetwarza ich dane,
+    przepuszcza przez model i zwraca posortowany ranking.
+    """
     global model
     if model is None:
         # Jeśli model się nie załadował przy starcie, zwróć błąd
         return {"ranked_candidates": []}
     try:
+        # 1. Konwertuj listę kandydatów
         candidate_data_list = [c.model_dump(by_alias=True) for c in request.candidates]
         identifiers = [c['identifier'] for c in candidate_data_list]
+        # 2. Stwórz DataFrame
         df = pd.DataFrame(candidate_data_list)
+        # Upewnij się, że brakuje tylko kolumny 'identifier', a reszta pasuje
         features_df = df.drop(columns=['identifier'])
+        # Ustaw kolejność kolumn DOKŁADNIE tak, jak w treningu
         features_df_ordered = features_df[MODEL_FEATURES_ORDER]
+        # 3. Predykcja
         probabilities = model.predict_proba(features_df_ordered)[:, 1]
+        # 4. Tworzenie odpowiedzi
         ranked_list = []
         for i, identifier in enumerate(identifiers):
             ranked_list.append(RankedCandidate(
                 score=probabilities[i]
             ))
+        # 5. Sortowanie
         sorted_ranked_list = sorted(ranked_list, key=lambda x: x.score, reverse=True)
         return {"ranked_candidates": sorted_ranked_list}
 # Uruchomienie aplikacji (dla testów lokalnych)
 if __name__ == "__main__":
     import uvicorn
+    # Uwaga: przy starcie z __main__ lifespan nie zadziała automatycznie
+    # Trzeba by go wywołać ręcznie lub po prostu polegać na teście z uvicorn
+    print("Uruchamianie lokalne - model zostanie załadowany przez 'lifespan' po starcie uvicorn.")
     uvicorn.run(app, host="0.0.0.0", port=8000)