AlexVplle commited on
Commit
a3aaaa2
·
verified ·
1 Parent(s): 83121cf

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +232 -0
app.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.ensemble import RandomForestClassifier
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.metrics import accuracy_score, classification_report
7
+ import joblib
8
+ import json
9
+ from huggingface_hub import HfApi
10
+ import os
11
+
12
+ def main():
13
+ print("Starting RandomForest training...")
14
+
15
+ # Load dataset from URL
16
+ import json
17
+ with open("dataset_config.json", "r") as f:
18
+ config = json.load(f)
19
+
20
+ file_url = config["file_url"]
21
+ print(f"Downloading dataset from: {file_url}")
22
+
23
+ df = pd.read_csv(file_url)
24
+ print(f"Dataset shape: {df.shape}")
25
+
26
+ # Separate features and target
27
+ feature_columns = [col for col in df.columns if col != 'label']
28
+ X = df[feature_columns]
29
+ y = df['label']
30
+
31
+ print(f"Features: {feature_columns}")
32
+ print(f"Classes: {y.unique().tolist()}")
33
+
34
+ # Train-test split
35
+ X_train, X_test, y_train, y_test = train_test_split(
36
+ X, y, test_size=0.2, random_state=42, stratify=y
37
+ )
38
+
39
+ # Train RandomForest
40
+ rf = RandomForestClassifier(
41
+ n_estimators=100,
42
+ max_depth=None,
43
+ random_state=42
44
+ )
45
+
46
+ print("Training model...")
47
+ rf.fit(X_train, y_train)
48
+
49
+ # Evaluate
50
+ y_pred = rf.predict(X_test)
51
+ accuracy = accuracy_score(y_test, y_pred)
52
+
53
+ print(f"Accuracy: {accuracy:.4f}")
54
+ print("\nClassification Report:")
55
+ print(classification_report(y_test, y_pred))
56
+
57
+ # Save model
58
+ joblib.dump(rf, "model.pkl")
59
+
60
+ # Save metadata
61
+ metadata = {
62
+ "job_id": "78f00e8c-eadf-435f-a324-a646d34459b7",
63
+ "model_name": "test-model-123",
64
+ "accuracy": accuracy,
65
+ "feature_names": feature_columns,
66
+ "n_classes": len(y.unique()),
67
+ "classes": y.unique().tolist()
68
+ }
69
+
70
+ with open("metadata.json", "w") as f:
71
+ json.dump(metadata, f, indent=2)
72
+
73
+ print("Training completed successfully!")
74
+
75
+ # Deploy inference Space
76
+ deploy_inference_space()
77
+
78
+ def deploy_inference_space():
79
+ print("Deploying inference Space...")
80
+
81
+ token = os.getenv("HF_TOKEN")
82
+ api = HfApi(token=token)
83
+ user_info = api.whoami()
84
+ username = user_info["name"]
85
+
86
+ inference_space_name = "test-model-123-inference"
87
+ inference_repo_id = f"{username}/{inference_space_name}"
88
+
89
+ try:
90
+ # Create inference Space
91
+ api.create_repo(
92
+ repo_id=inference_repo_id,
93
+ repo_type="space",
94
+ space_sdk="gradio"
95
+ )
96
+
97
+ # Upload inference app
98
+ inference_app = generate_inference_app()
99
+ api.upload_file(
100
+ path_or_fileobj=inference_app.encode(),
101
+ path_in_repo="app.py",
102
+ repo_id=inference_repo_id,
103
+ repo_type="space"
104
+ )
105
+
106
+ # Upload model and metadata
107
+ with open("model.pkl", "rb") as f:
108
+ api.upload_file(
109
+ path_or_fileobj=f,
110
+ path_in_repo="model.pkl",
111
+ repo_id=inference_repo_id,
112
+ repo_type="space"
113
+ )
114
+
115
+ with open("metadata.json", "rb") as f:
116
+ api.upload_file(
117
+ path_or_fileobj=f,
118
+ path_in_repo="metadata.json",
119
+ repo_id=inference_repo_id,
120
+ repo_type="space"
121
+ )
122
+
123
+ print(f"Inference Space deployed: https://huggingface.co/spaces/{inference_repo_id}")
124
+
125
+ except Exception as e:
126
+ print(f"Failed to deploy inference Space: {e}")
127
+
128
+ def generate_inference_app():
129
+ return '''
130
+ import gradio as gr
131
+ import joblib
132
+ import json
133
+ import pandas as pd
134
+ from fastapi import FastAPI
135
+ from fastapi.responses import JSONResponse
136
+ import uvicorn
137
+ import threading
138
+
139
+ # Load model and metadata
140
+ model = joblib.load("model.pkl")
141
+ with open("metadata.json", "r") as f:
142
+ metadata = json.load(f)
143
+
144
+ feature_names = metadata["feature_names"]
145
+
146
+ def predict(*features):
147
+ """Make prediction with the trained model"""
148
+
149
+ # Create input DataFrame
150
+ input_data = pd.DataFrame([list(features)], columns=feature_names)
151
+
152
+ # Predict
153
+ prediction = model.predict(input_data)[0]
154
+ probabilities = model.predict_proba(input_data)[0]
155
+
156
+ # Format results
157
+ prob_dict = {f"Class {i}": prob for i, prob in enumerate(probabilities)}
158
+
159
+ return f"Predicted Class: {prediction}", prob_dict
160
+
161
+ def predict_batch_from_url(file_url):
162
+ """Make batch predictions from CSV URL"""
163
+ try:
164
+ # Download and process CSV
165
+ df = pd.read_csv(file_url)
166
+
167
+ # Check if columns match
168
+ if not all(col in df.columns for col in feature_names):
169
+ return {"error": f"CSV must contain columns: {feature_names}"}
170
+
171
+ # Select only the feature columns
172
+ X = df[feature_names]
173
+
174
+ # Make predictions
175
+ predictions = model.predict(X)
176
+ probabilities = model.predict_proba(X)
177
+
178
+ # Format results
179
+ results = []
180
+ for i, (pred, probs) in enumerate(zip(predictions, probabilities)):
181
+ prob_dict = {f"Class {j}": float(prob) for j, prob in enumerate(probs)}
182
+ results.append({
183
+ "prediction": int(pred),
184
+ "probabilities": prob_dict
185
+ })
186
+
187
+ return {"predictions": results}
188
+
189
+ except Exception as e:
190
+ return {"error": str(e)}
191
+
192
+ # FastAPI for batch predictions
193
+ app = FastAPI()
194
+
195
+ @app.post("/api/predict_batch")
196
+ async def api_predict_batch(request: dict):
197
+ file_url = request.get("file_url")
198
+ if not file_url:
199
+ return JSONResponse({"error": "file_url is required"}, status_code=400)
200
+
201
+ result = predict_batch_from_url(file_url)
202
+ return JSONResponse(result)
203
+
204
+ # Gradio interface for single predictions
205
+ inputs = [gr.Number(label=name) for name in feature_names]
206
+ outputs = [
207
+ gr.Textbox(label="Prediction"),
208
+ gr.Label(label="Probabilities")
209
+ ]
210
+
211
+ interface = gr.Interface(
212
+ fn=predict,
213
+ inputs=inputs,
214
+ outputs=outputs,
215
+ title=f"{metadata['model_name']} - ML Classifier",
216
+ description=f"Accuracy: {metadata['accuracy']:.4f} | Features: {len(feature_names)}"
217
+ )
218
+
219
+ def run_fastapi():
220
+ uvicorn.run(app, host="0.0.0.0", port=8000)
221
+
222
+ if __name__ == "__main__":
223
+ # Start FastAPI in background
224
+ fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
225
+ fastapi_thread.start()
226
+
227
+ # Start Gradio
228
+ interface.launch(server_port=7860)
229
+ '''
230
+
231
+ if __name__ == "__main__":
232
+ main()