Spaces:

AlexVplle
/

predictif-training-c395ce79

Runtime error

App Files Files Community

AlexVplle commited on Sep 13, 2025

Commit

00e077c

verified ·

1 Parent(s): e2c9373

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +248 -0

app.py ADDED Viewed

	@@ -0,0 +1,248 @@

+import pandas as pd
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, classification_report
+import joblib
+import json
+from huggingface_hub import HfApi
+import os
+def main():
+    print("Starting RandomForest training...")
+    # Load dataset from URL
+    import json
+    with open("dataset_config.json", "r") as f:
+        config = json.load(f)
+    file_url = config["file_url"]
+    print(f"Downloading dataset from: {file_url}")
+    df = pd.read_csv(file_url)
+    print(f"Dataset shape: {df.shape}")
+    # Separate features and target
+    feature_columns = [col for col in df.columns if col != 'label']
+    X = df[feature_columns]
+    y = df['label']
+    print(f"Features: {feature_columns}")
+    print(f"Classes: {y.unique().tolist()}")
+    # Train-test split
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42, stratify=y
+    )
+    # Train RandomForest
+    rf = RandomForestClassifier(
+        n_estimators=100,
+        max_depth=None,
+        random_state=42
+    )
+    print("Training model...")
+    rf.fit(X_train, y_train)
+    # Evaluate
+    y_pred = rf.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    print(f"Accuracy: {accuracy:.4f}")
+    print("\nClassification Report:")
+    print(classification_report(y_test, y_pred))
+    # Save model
+    joblib.dump(rf, "model.pkl")
+    # Save metadata
+    metadata = {
+        "job_id": "c395ce79-f111-4914-82f4-936a06b107de",
+        "model_name": "test-model-123",
+        "accuracy": accuracy,
+        "feature_names": feature_columns,
+        "n_classes": len(y.unique()),
+        "classes": y.unique().tolist()
+    }
+    with open("metadata.json", "w") as f:
+        json.dump(metadata, f, indent=2)
+    print("Training completed successfully!")
+    # Deploy inference Space
+    deploy_inference_space()
+def deploy_inference_space():
+    print("Deploying inference Space...")
+    token = os.getenv("HF_TOKEN")
+    api = HfApi(token=token)
+    user_info = api.whoami()
+    username = user_info["name"]
+    inference_space_name = "test-model-123-inference-c395ce79"
+    inference_repo_id = f"{username}/{inference_space_name}"
+    try:
+        # Create inference Space
+        api.create_repo(
+            repo_id=inference_repo_id,
+            repo_type="space",
+            space_sdk="gradio"
+        )
+        # Upload inference app
+        inference_app = generate_inference_app()
+        api.upload_file(
+            path_or_fileobj=inference_app.encode(),
+            path_in_repo="app.py",
+            repo_id=inference_repo_id,
+            repo_type="space"
+        )
+        # Upload requirements for inference space
+        inference_requirements = generate_inference_requirements()
+        api.upload_file(
+            path_or_fileobj=inference_requirements.encode(),
+            path_in_repo="requirements.txt",
+            repo_id=inference_repo_id,
+            repo_type="space"
+        )
+        # Upload model and metadata
+        with open("model.pkl", "rb") as f:
+            api.upload_file(
+                path_or_fileobj=f,
+                path_in_repo="model.pkl",
+                repo_id=inference_repo_id,
+                repo_type="space"
+            )
+        with open("metadata.json", "rb") as f:
+            api.upload_file(
+                path_or_fileobj=f,
+                path_in_repo="metadata.json",
+                repo_id=inference_repo_id,
+                repo_type="space"
+            )
+        print(f"Inference Space deployed: https://huggingface.co/spaces/{inference_repo_id}")
+    except Exception as e:
+        print(f"Failed to deploy inference Space: {e}")
+def generate_inference_requirements():
+    return '''gradio
+joblib
+pandas
+fastapi
+uvicorn'''
+def generate_inference_app():
+    return '''
+import gradio as gr
+import joblib
+import json
+import pandas as pd
+from fastapi import FastAPI
+from fastapi.responses import JSONResponse
+import uvicorn
+import threading
+# Load model and metadata
+model = joblib.load("model.pkl")
+with open("metadata.json", "r") as f:
+    metadata = json.load(f)
+feature_names = metadata["feature_names"]
+def predict(*features):
+    """Make prediction with the trained model"""
+    # Create input DataFrame
+    input_data = pd.DataFrame([list(features)], columns=feature_names)
+    # Predict
+    prediction = model.predict(input_data)[0]
+    probabilities = model.predict_proba(input_data)[0]
+    # Format results
+    prob_dict = {f"Class {i}": prob for i, prob in enumerate(probabilities)}
+    return f"Predicted Class: {prediction}", prob_dict
+def predict_batch_from_url(file_url):
+    """Make batch predictions from CSV URL"""
+    try:
+        # Download and process CSV
+        df = pd.read_csv(file_url)
+        # Check if columns match
+        if not all(col in df.columns for col in feature_names):
+            return {"error": f"CSV must contain columns: {feature_names}"}
+        # Select only the feature columns
+        X = df[feature_names]
+        # Make predictions
+        predictions = model.predict(X)
+        probabilities = model.predict_proba(X)
+        # Format results
+        results = []
+        for i, (pred, probs) in enumerate(zip(predictions, probabilities)):
+            prob_dict = {f"Class {j}": float(prob) for j, prob in enumerate(probs)}
+            results.append({
+                "prediction": int(pred),
+                "probabilities": prob_dict
+            })
+        return {"predictions": results}
+    except Exception as e:
+        return {"error": str(e)}
+# FastAPI for batch predictions
+app = FastAPI()
+@app.post("/api/predict_batch")
+async def api_predict_batch(request: dict):
+    file_url = request.get("file_url")
+    if not file_url:
+        return JSONResponse({"error": "file_url is required"}, status_code=400)
+    result = predict_batch_from_url(file_url)
+    return JSONResponse(result)
+# Gradio interface for single predictions
+inputs = [gr.Number(label=name) for name in feature_names]
+outputs = [
+    gr.Textbox(label="Prediction"),
+    gr.Label(label="Probabilities")
+]
+interface = gr.Interface(
+    fn=predict,
+    inputs=inputs,
+    outputs=outputs,
+    title=f"{metadata['model_name']} - ML Classifier",
+    description=f"Accuracy: {metadata['accuracy']:.4f} | Features: {len(feature_names)}"
+)
+def run_fastapi():
+    uvicorn.run(app, host="0.0.0.0", port=8000)
+if __name__ == "__main__":
+    # Start FastAPI in background
+    fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
+    fastapi_thread.start()
+    # Start Gradio
+    interface.launch(server_port=7860)
+'''
+if __name__ == "__main__":
+    main()