AlexVplle's picture
Upload app.py with huggingface_hub
6670a76 verified
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import joblib
import json
from huggingface_hub import HfApi
import os
def main():
print("Starting RandomForest training...")
# Load dataset from URL
import json
with open("dataset_config.json", "r") as f:
config = json.load(f)
file_url = config["file_url"]
print(f"Downloading dataset from: {file_url}")
df = pd.read_csv(file_url)
print(f"Dataset shape: {df.shape}")
# Separate features and target
feature_columns = [col for col in df.columns if col != 'label']
X = df[feature_columns]
y = df['label']
print(f"Features: {feature_columns}")
print(f"Classes: {y.unique().tolist()}")
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
# Train RandomForest
rf = RandomForestClassifier(
n_estimators=100,
max_depth=None,
random_state=42
)
print("Training model...")
rf.fit(X_train, y_train)
# Evaluate
y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
# Save model
joblib.dump(rf, "model.pkl")
# Save metadata
metadata = {
"job_id": "1a2df142-8854-4cd1-bf73-e9f10c993d15",
"model_name": "test-model-123",
"accuracy": accuracy,
"feature_names": feature_columns,
"n_classes": len(y.unique()),
"classes": y.unique().tolist()
}
with open("metadata.json", "w") as f:
json.dump(metadata, f, indent=2)
print("Training completed successfully!")
# Deploy inference Space
deploy_inference_space()
def deploy_inference_space():
print("Deploying inference Space...")
token = os.getenv("HF_TOKEN")
api = HfApi(token=token)
user_info = api.whoami()
username = user_info["name"]
inference_space_name = "test-model-123-inference"
inference_repo_id = f"{username}/{inference_space_name}"
try:
# Create inference Space
api.create_repo(
repo_id=inference_repo_id,
repo_type="space",
space_sdk="gradio"
)
# Upload inference app
inference_app = generate_inference_app()
api.upload_file(
path_or_fileobj=inference_app.encode(),
path_in_repo="app.py",
repo_id=inference_repo_id,
repo_type="space"
)
# Upload model and metadata
with open("model.pkl", "rb") as f:
api.upload_file(
path_or_fileobj=f,
path_in_repo="model.pkl",
repo_id=inference_repo_id,
repo_type="space"
)
with open("metadata.json", "rb") as f:
api.upload_file(
path_or_fileobj=f,
path_in_repo="metadata.json",
repo_id=inference_repo_id,
repo_type="space"
)
print(f"Inference Space deployed: https://huggingface.co/spaces/{inference_repo_id}")
except Exception as e:
print(f"Failed to deploy inference Space: {e}")
def generate_inference_app():
return '''
import gradio as gr
import joblib
import json
import pandas as pd
from fastapi import FastAPI
from fastapi.responses import JSONResponse
import uvicorn
import threading
# Load model and metadata
model = joblib.load("model.pkl")
with open("metadata.json", "r") as f:
metadata = json.load(f)
feature_names = metadata["feature_names"]
def predict(*features):
"""Make prediction with the trained model"""
# Create input DataFrame
input_data = pd.DataFrame([list(features)], columns=feature_names)
# Predict
prediction = model.predict(input_data)[0]
probabilities = model.predict_proba(input_data)[0]
# Format results
prob_dict = {f"Class {i}": prob for i, prob in enumerate(probabilities)}
return f"Predicted Class: {prediction}", prob_dict
def predict_batch_from_url(file_url):
"""Make batch predictions from CSV URL"""
try:
# Download and process CSV
df = pd.read_csv(file_url)
# Check if columns match
if not all(col in df.columns for col in feature_names):
return {"error": f"CSV must contain columns: {feature_names}"}
# Select only the feature columns
X = df[feature_names]
# Make predictions
predictions = model.predict(X)
probabilities = model.predict_proba(X)
# Format results
results = []
for i, (pred, probs) in enumerate(zip(predictions, probabilities)):
prob_dict = {f"Class {j}": float(prob) for j, prob in enumerate(probs)}
results.append({
"prediction": int(pred),
"probabilities": prob_dict
})
return {"predictions": results}
except Exception as e:
return {"error": str(e)}
# FastAPI for batch predictions
app = FastAPI()
@app.post("/api/predict_batch")
async def api_predict_batch(request: dict):
file_url = request.get("file_url")
if not file_url:
return JSONResponse({"error": "file_url is required"}, status_code=400)
result = predict_batch_from_url(file_url)
return JSONResponse(result)
# Gradio interface for single predictions
inputs = [gr.Number(label=name) for name in feature_names]
outputs = [
gr.Textbox(label="Prediction"),
gr.Label(label="Probabilities")
]
interface = gr.Interface(
fn=predict,
inputs=inputs,
outputs=outputs,
title=f"{metadata['model_name']} - ML Classifier",
description=f"Accuracy: {metadata['accuracy']:.4f} | Features: {len(feature_names)}"
)
def run_fastapi():
uvicorn.run(app, host="0.0.0.0", port=8000)
if __name__ == "__main__":
# Start FastAPI in background
fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
fastapi_thread.start()
# Start Gradio
interface.launch(server_port=7860)
'''
if __name__ == "__main__":
main()