Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import accuracy_score, classification_report | |
| import joblib | |
| import json | |
| from huggingface_hub import HfApi | |
| import os | |
| def main(): | |
| print("Starting RandomForest training...") | |
| # Load dataset from URL | |
| import json | |
| with open("dataset_config.json", "r") as f: | |
| config = json.load(f) | |
| file_url = config["file_url"] | |
| print(f"Downloading dataset from: {file_url}") | |
| df = pd.read_csv(file_url) | |
| print(f"Dataset shape: {df.shape}") | |
| # Separate features and target | |
| feature_columns = [col for col in df.columns if col != 'label'] | |
| X = df[feature_columns] | |
| y = df['label'] | |
| print(f"Features: {feature_columns}") | |
| print(f"Classes: {y.unique().tolist()}") | |
| # Train-test split | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, random_state=42, stratify=y | |
| ) | |
| # Train RandomForest | |
| rf = RandomForestClassifier( | |
| n_estimators=100, | |
| max_depth=None, | |
| random_state=42 | |
| ) | |
| print("Training model...") | |
| rf.fit(X_train, y_train) | |
| # Evaluate | |
| y_pred = rf.predict(X_test) | |
| accuracy = accuracy_score(y_test, y_pred) | |
| print(f"Accuracy: {accuracy:.4f}") | |
| print("\nClassification Report:") | |
| print(classification_report(y_test, y_pred)) | |
| # Save model | |
| joblib.dump(rf, "model.pkl") | |
| # Save metadata | |
| metadata = { | |
| "job_id": "1a2df142-8854-4cd1-bf73-e9f10c993d15", | |
| "model_name": "test-model-123", | |
| "accuracy": accuracy, | |
| "feature_names": feature_columns, | |
| "n_classes": len(y.unique()), | |
| "classes": y.unique().tolist() | |
| } | |
| with open("metadata.json", "w") as f: | |
| json.dump(metadata, f, indent=2) | |
| print("Training completed successfully!") | |
| # Deploy inference Space | |
| deploy_inference_space() | |
| def deploy_inference_space(): | |
| print("Deploying inference Space...") | |
| token = os.getenv("HF_TOKEN") | |
| api = HfApi(token=token) | |
| user_info = api.whoami() | |
| username = user_info["name"] | |
| inference_space_name = "test-model-123-inference" | |
| inference_repo_id = f"{username}/{inference_space_name}" | |
| try: | |
| # Create inference Space | |
| api.create_repo( | |
| repo_id=inference_repo_id, | |
| repo_type="space", | |
| space_sdk="gradio" | |
| ) | |
| # Upload inference app | |
| inference_app = generate_inference_app() | |
| api.upload_file( | |
| path_or_fileobj=inference_app.encode(), | |
| path_in_repo="app.py", | |
| repo_id=inference_repo_id, | |
| repo_type="space" | |
| ) | |
| # Upload model and metadata | |
| with open("model.pkl", "rb") as f: | |
| api.upload_file( | |
| path_or_fileobj=f, | |
| path_in_repo="model.pkl", | |
| repo_id=inference_repo_id, | |
| repo_type="space" | |
| ) | |
| with open("metadata.json", "rb") as f: | |
| api.upload_file( | |
| path_or_fileobj=f, | |
| path_in_repo="metadata.json", | |
| repo_id=inference_repo_id, | |
| repo_type="space" | |
| ) | |
| print(f"Inference Space deployed: https://huggingface.co/spaces/{inference_repo_id}") | |
| except Exception as e: | |
| print(f"Failed to deploy inference Space: {e}") | |
| def generate_inference_app(): | |
| return ''' | |
| import gradio as gr | |
| import joblib | |
| import json | |
| import pandas as pd | |
| from fastapi import FastAPI | |
| from fastapi.responses import JSONResponse | |
| import uvicorn | |
| import threading | |
| # Load model and metadata | |
| model = joblib.load("model.pkl") | |
| with open("metadata.json", "r") as f: | |
| metadata = json.load(f) | |
| feature_names = metadata["feature_names"] | |
| def predict(*features): | |
| """Make prediction with the trained model""" | |
| # Create input DataFrame | |
| input_data = pd.DataFrame([list(features)], columns=feature_names) | |
| # Predict | |
| prediction = model.predict(input_data)[0] | |
| probabilities = model.predict_proba(input_data)[0] | |
| # Format results | |
| prob_dict = {f"Class {i}": prob for i, prob in enumerate(probabilities)} | |
| return f"Predicted Class: {prediction}", prob_dict | |
| def predict_batch_from_url(file_url): | |
| """Make batch predictions from CSV URL""" | |
| try: | |
| # Download and process CSV | |
| df = pd.read_csv(file_url) | |
| # Check if columns match | |
| if not all(col in df.columns for col in feature_names): | |
| return {"error": f"CSV must contain columns: {feature_names}"} | |
| # Select only the feature columns | |
| X = df[feature_names] | |
| # Make predictions | |
| predictions = model.predict(X) | |
| probabilities = model.predict_proba(X) | |
| # Format results | |
| results = [] | |
| for i, (pred, probs) in enumerate(zip(predictions, probabilities)): | |
| prob_dict = {f"Class {j}": float(prob) for j, prob in enumerate(probs)} | |
| results.append({ | |
| "prediction": int(pred), | |
| "probabilities": prob_dict | |
| }) | |
| return {"predictions": results} | |
| except Exception as e: | |
| return {"error": str(e)} | |
| # FastAPI for batch predictions | |
| app = FastAPI() | |
| @app.post("/api/predict_batch") | |
| async def api_predict_batch(request: dict): | |
| file_url = request.get("file_url") | |
| if not file_url: | |
| return JSONResponse({"error": "file_url is required"}, status_code=400) | |
| result = predict_batch_from_url(file_url) | |
| return JSONResponse(result) | |
| # Gradio interface for single predictions | |
| inputs = [gr.Number(label=name) for name in feature_names] | |
| outputs = [ | |
| gr.Textbox(label="Prediction"), | |
| gr.Label(label="Probabilities") | |
| ] | |
| interface = gr.Interface( | |
| fn=predict, | |
| inputs=inputs, | |
| outputs=outputs, | |
| title=f"{metadata['model_name']} - ML Classifier", | |
| description=f"Accuracy: {metadata['accuracy']:.4f} | Features: {len(feature_names)}" | |
| ) | |
| def run_fastapi(): | |
| uvicorn.run(app, host="0.0.0.0", port=8000) | |
| if __name__ == "__main__": | |
| # Start FastAPI in background | |
| fastapi_thread = threading.Thread(target=run_fastapi, daemon=True) | |
| fastapi_thread.start() | |
| # Start Gradio | |
| interface.launch(server_port=7860) | |
| ''' | |
| if __name__ == "__main__": | |
| main() | |