Spaces:
Runtime error
Runtime error
File size: 6,313 Bytes
6670a76 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import joblib
import json
from huggingface_hub import HfApi
import os
def main():
print("Starting RandomForest training...")
# Load dataset from URL
import json
with open("dataset_config.json", "r") as f:
config = json.load(f)
file_url = config["file_url"]
print(f"Downloading dataset from: {file_url}")
df = pd.read_csv(file_url)
print(f"Dataset shape: {df.shape}")
# Separate features and target
feature_columns = [col for col in df.columns if col != 'label']
X = df[feature_columns]
y = df['label']
print(f"Features: {feature_columns}")
print(f"Classes: {y.unique().tolist()}")
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
# Train RandomForest
rf = RandomForestClassifier(
n_estimators=100,
max_depth=None,
random_state=42
)
print("Training model...")
rf.fit(X_train, y_train)
# Evaluate
y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
# Save model
joblib.dump(rf, "model.pkl")
# Save metadata
metadata = {
"job_id": "1a2df142-8854-4cd1-bf73-e9f10c993d15",
"model_name": "test-model-123",
"accuracy": accuracy,
"feature_names": feature_columns,
"n_classes": len(y.unique()),
"classes": y.unique().tolist()
}
with open("metadata.json", "w") as f:
json.dump(metadata, f, indent=2)
print("Training completed successfully!")
# Deploy inference Space
deploy_inference_space()
def deploy_inference_space():
print("Deploying inference Space...")
token = os.getenv("HF_TOKEN")
api = HfApi(token=token)
user_info = api.whoami()
username = user_info["name"]
inference_space_name = "test-model-123-inference"
inference_repo_id = f"{username}/{inference_space_name}"
try:
# Create inference Space
api.create_repo(
repo_id=inference_repo_id,
repo_type="space",
space_sdk="gradio"
)
# Upload inference app
inference_app = generate_inference_app()
api.upload_file(
path_or_fileobj=inference_app.encode(),
path_in_repo="app.py",
repo_id=inference_repo_id,
repo_type="space"
)
# Upload model and metadata
with open("model.pkl", "rb") as f:
api.upload_file(
path_or_fileobj=f,
path_in_repo="model.pkl",
repo_id=inference_repo_id,
repo_type="space"
)
with open("metadata.json", "rb") as f:
api.upload_file(
path_or_fileobj=f,
path_in_repo="metadata.json",
repo_id=inference_repo_id,
repo_type="space"
)
print(f"Inference Space deployed: https://huggingface.co/spaces/{inference_repo_id}")
except Exception as e:
print(f"Failed to deploy inference Space: {e}")
def generate_inference_app():
return '''
import gradio as gr
import joblib
import json
import pandas as pd
from fastapi import FastAPI
from fastapi.responses import JSONResponse
import uvicorn
import threading
# Load model and metadata
model = joblib.load("model.pkl")
with open("metadata.json", "r") as f:
metadata = json.load(f)
feature_names = metadata["feature_names"]
def predict(*features):
"""Make prediction with the trained model"""
# Create input DataFrame
input_data = pd.DataFrame([list(features)], columns=feature_names)
# Predict
prediction = model.predict(input_data)[0]
probabilities = model.predict_proba(input_data)[0]
# Format results
prob_dict = {f"Class {i}": prob for i, prob in enumerate(probabilities)}
return f"Predicted Class: {prediction}", prob_dict
def predict_batch_from_url(file_url):
"""Make batch predictions from CSV URL"""
try:
# Download and process CSV
df = pd.read_csv(file_url)
# Check if columns match
if not all(col in df.columns for col in feature_names):
return {"error": f"CSV must contain columns: {feature_names}"}
# Select only the feature columns
X = df[feature_names]
# Make predictions
predictions = model.predict(X)
probabilities = model.predict_proba(X)
# Format results
results = []
for i, (pred, probs) in enumerate(zip(predictions, probabilities)):
prob_dict = {f"Class {j}": float(prob) for j, prob in enumerate(probs)}
results.append({
"prediction": int(pred),
"probabilities": prob_dict
})
return {"predictions": results}
except Exception as e:
return {"error": str(e)}
# FastAPI for batch predictions
app = FastAPI()
@app.post("/api/predict_batch")
async def api_predict_batch(request: dict):
file_url = request.get("file_url")
if not file_url:
return JSONResponse({"error": "file_url is required"}, status_code=400)
result = predict_batch_from_url(file_url)
return JSONResponse(result)
# Gradio interface for single predictions
inputs = [gr.Number(label=name) for name in feature_names]
outputs = [
gr.Textbox(label="Prediction"),
gr.Label(label="Probabilities")
]
interface = gr.Interface(
fn=predict,
inputs=inputs,
outputs=outputs,
title=f"{metadata['model_name']} - ML Classifier",
description=f"Accuracy: {metadata['accuracy']:.4f} | Features: {len(feature_names)}"
)
def run_fastapi():
uvicorn.run(app, host="0.0.0.0", port=8000)
if __name__ == "__main__":
# Start FastAPI in background
fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
fastapi_thread.start()
# Start Gradio
interface.launch(server_port=7860)
'''
if __name__ == "__main__":
main()
|