Spaces:

shelfgot
/

talmud-language-classifier

Sleeping

App Files Files Community

shelfgot commited on Nov 11, 2025

Commit

172b660

verified ·

1 Parent(s): 6441bee

no predictions, save model

Browse files

Files changed (1) hide show

app.py +114 -25

app.py CHANGED Viewed

@@ -13,9 +13,11 @@ import threading
 import logging
 from typing import Optional
 from pydantic import BaseModel
-from train import train_model
-from predict import generate_all_predictions
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -47,10 +49,15 @@ class TrainingRequest(BaseModel):
     callback_auth_token: str
     timestamp: Optional[str] = None
 def run_training_async(training_data: str, callback_url: str, callback_auth_token: str):
     """
     Run training in a separate thread to avoid blocking the request.
-    This function runs the full training and prediction pipeline.
     """
     global training_in_progress, training_result, training_error
@@ -63,32 +70,15 @@ def run_training_async(training_data: str, callback_url: str, callback_auth_toke
         # Train the model
         result = train_model(training_data)
-        model = result['model']
-        word_to_idx = result['word_to_idx']
-        label_encoder = result['label_encoder']
         stats = result['stats']
         logger.info(f"Training completed. Accuracy: {stats['accuracy']:.4f}")
-        # Get Vercel base URL from environment
-        vercel_base_url = os.getenv('VERCEL_BASE_URL')
-        if not vercel_base_url:
-            raise ValueError("VERCEL_BASE_URL environment variable not set")
-        logger.info("Generating predictions for all dafim...")
-        # Generate predictions for all dafim
-        # Use the callback_auth_token to authenticate requests to Vercel endpoints
-        predictions = generate_all_predictions(
-            model, word_to_idx, label_encoder, vercel_base_url, callback_auth_token
-        )
-        logger.info(f"Generated {len(predictions)} predictions")
-        # Prepare callback payload
         callback_payload = {
             'stats': stats,
-            'predictions': predictions,
             'auth_token': callback_auth_token
         }
@@ -97,7 +87,7 @@ def run_training_async(training_data: str, callback_url: str, callback_auth_toke
         response = requests.post(
             callback_url,
             json=callback_payload,
-            timeout=300,  # 5 minute timeout
             headers={'Content-Type': 'application/json'}
         )
@@ -201,8 +191,107 @@ async def health_check():
     """Health check endpoint"""
     return {"status": "healthy"}
 if __name__ == "__main__":
     import uvicorn
     port = int(os.getenv("PORT", 7860))
     uvicorn.run(app, host="0.0.0.0", port=port)

 import logging
 from typing import Optional
 from pydantic import BaseModel
+import torch
+import pickle
+from train import train_model, TalmudClassifierLSTM, MAX_LEN, EMBEDDING_DIM, HIDDEN_DIM
+from predict import generate_predictions_for_daf
 # Configure logging
 logging.basicConfig(level=logging.INFO)
     callback_auth_token: str
     timestamp: Optional[str] = None
+class PredictionRequest(BaseModel):
+    daf_text: str
+    auth_token: str
 def run_training_async(training_data: str, callback_url: str, callback_auth_token: str):
     """
     Run training in a separate thread to avoid blocking the request.
+    Trains the model on the provided training data and returns test results
+    on the ground truth (test set). Does not generate predictions for all dafim.
     """
     global training_in_progress, training_result, training_error
         # Train the model
         result = train_model(training_data)
         stats = result['stats']
         logger.info(f"Training completed. Accuracy: {stats['accuracy']:.4f}")
+        logger.info(f"Test set results - Accuracy: {stats['accuracy']:.4f}, Loss: {stats['loss']:.4f}")
+        logger.info(f"F1 Scores: {stats['f1_scores']}")
+        # Prepare callback payload with only stats (test results on ground truth)
         callback_payload = {
             'stats': stats,
             'auth_token': callback_auth_token
         }
         response = requests.post(
             callback_url,
             json=callback_payload,
+            timeout=60,  # Reduced timeout since we're not generating predictions
             headers={'Content-Type': 'application/json'}
         )
     """Health check endpoint"""
     return {"status": "healthy"}
+def load_model_artifacts():
+    """
+    Load model artifacts from /tmp directory.
+    Returns (model, word_to_idx, label_encoder) or (None, None, None) if not found.
+    """
+    model_path = '/tmp/latest_model.pt'
+    word_to_idx_path = '/tmp/word_to_idx.pt'
+    label_encoder_path = '/tmp/label_encoder.pkl'
+    try:
+        # Check if all files exist
+        if not os.path.exists(model_path) or not os.path.exists(word_to_idx_path) or not os.path.exists(label_encoder_path):
+            return None, None, None
+        # Load word_to_idx
+        word_to_idx = torch.load(word_to_idx_path)
+        # Load label_encoder
+        with open(label_encoder_path, 'rb') as f:
+            label_encoder = pickle.load(f)
+        # Determine number of classes from label_encoder
+        num_classes = len(label_encoder.classes_)
+        # Create model and load state dict
+        # Explicitly load on CPU (HF Spaces typically use CPU)
+        model = TalmudClassifierLSTM(len(word_to_idx), EMBEDDING_DIM, HIDDEN_DIM, num_classes)
+        model.load_state_dict(torch.load(model_path, map_location='cpu'))
+        model.eval()
+        # Ensure model is on CPU
+        model = model.cpu()
+        logger.info("Successfully loaded model artifacts from /tmp")
+        return model, word_to_idx, label_encoder
+    except Exception as e:
+        logger.error(f"Error loading model artifacts: {e}", exc_info=True)
+        return None, None, None
+@app.post("/predict")
+async def predict_endpoint(request: PredictionRequest):
+    """
+    On-demand prediction endpoint.
+    Accepts daf text and generates predictions using the latest trained model.
+    Authentication: Requires TRAINING_CALLBACK_TOKEN to be set in environment variables.
+    The token must match the auth_token sent in the request body.
+    """
+    # Verify authentication token
+    # Security: Always require authentication token to match TRAINING_CALLBACK_TOKEN
+    expected_token = os.getenv('TRAINING_CALLBACK_TOKEN')
+    if not expected_token:
+        logger.error("TRAINING_CALLBACK_TOKEN not set in environment - prediction endpoint is insecure!")
+        raise HTTPException(
+            status_code=500,
+            detail="Server configuration error: TRAINING_CALLBACK_TOKEN not configured"
+        )
+    if not request.auth_token or request.auth_token != expected_token:
+        raise HTTPException(
+            status_code=401,
+            detail="Unauthorized: Invalid authentication token"
+        )
+    if not request.daf_text or not request.daf_text.strip():
+        raise HTTPException(
+            status_code=400,
+            detail="Missing or empty daf_text"
+        )
+    # Load model artifacts
+    model, word_to_idx, label_encoder = load_model_artifacts()
+    if model is None or word_to_idx is None or label_encoder is None:
+        raise HTTPException(
+            status_code=404,
+            detail="Model not found. Please train a model first by triggering training from your Vercel app."
+        )
+    try:
+        # Generate predictions
+        logger.info("Generating predictions for daf text...")
+        ranges = generate_predictions_for_daf(
+            model, request.daf_text, word_to_idx, label_encoder
+        )
+        logger.info(f"Generated {len(ranges)} prediction ranges")
+        return {
+            "success": True,
+            "ranges": ranges
+        }
+    except Exception as e:
+        logger.error(f"Error generating predictions: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error generating predictions: {str(e)}"
+        )
 if __name__ == "__main__":
     import uvicorn
     port = int(os.getenv("PORT", 7860))
     uvicorn.run(app, host="0.0.0.0", port=port)