AlexVplle commited on
Commit
00e077c
·
verified ·
1 Parent(s): e2c9373

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +248 -0
app.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.ensemble import RandomForestClassifier
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.metrics import accuracy_score, classification_report
7
+ import joblib
8
+ import json
9
+ from huggingface_hub import HfApi
10
+ import os
11
+
12
+ def main():
13
+ print("Starting RandomForest training...")
14
+
15
+ # Load dataset from URL
16
+ import json
17
+ with open("dataset_config.json", "r") as f:
18
+ config = json.load(f)
19
+
20
+ file_url = config["file_url"]
21
+ print(f"Downloading dataset from: {file_url}")
22
+
23
+ df = pd.read_csv(file_url)
24
+ print(f"Dataset shape: {df.shape}")
25
+
26
+ # Separate features and target
27
+ feature_columns = [col for col in df.columns if col != 'label']
28
+ X = df[feature_columns]
29
+ y = df['label']
30
+
31
+ print(f"Features: {feature_columns}")
32
+ print(f"Classes: {y.unique().tolist()}")
33
+
34
+ # Train-test split
35
+ X_train, X_test, y_train, y_test = train_test_split(
36
+ X, y, test_size=0.2, random_state=42, stratify=y
37
+ )
38
+
39
+ # Train RandomForest
40
+ rf = RandomForestClassifier(
41
+ n_estimators=100,
42
+ max_depth=None,
43
+ random_state=42
44
+ )
45
+
46
+ print("Training model...")
47
+ rf.fit(X_train, y_train)
48
+
49
+ # Evaluate
50
+ y_pred = rf.predict(X_test)
51
+ accuracy = accuracy_score(y_test, y_pred)
52
+
53
+ print(f"Accuracy: {accuracy:.4f}")
54
+ print("\nClassification Report:")
55
+ print(classification_report(y_test, y_pred))
56
+
57
+ # Save model
58
+ joblib.dump(rf, "model.pkl")
59
+
60
+ # Save metadata
61
+ metadata = {
62
+ "job_id": "c395ce79-f111-4914-82f4-936a06b107de",
63
+ "model_name": "test-model-123",
64
+ "accuracy": accuracy,
65
+ "feature_names": feature_columns,
66
+ "n_classes": len(y.unique()),
67
+ "classes": y.unique().tolist()
68
+ }
69
+
70
+ with open("metadata.json", "w") as f:
71
+ json.dump(metadata, f, indent=2)
72
+
73
+ print("Training completed successfully!")
74
+
75
+ # Deploy inference Space
76
+ deploy_inference_space()
77
+
78
+ def deploy_inference_space():
79
+ print("Deploying inference Space...")
80
+
81
+ token = os.getenv("HF_TOKEN")
82
+ api = HfApi(token=token)
83
+ user_info = api.whoami()
84
+ username = user_info["name"]
85
+
86
+ inference_space_name = "test-model-123-inference-c395ce79"
87
+ inference_repo_id = f"{username}/{inference_space_name}"
88
+
89
+ try:
90
+ # Create inference Space
91
+ api.create_repo(
92
+ repo_id=inference_repo_id,
93
+ repo_type="space",
94
+ space_sdk="gradio"
95
+ )
96
+
97
+ # Upload inference app
98
+ inference_app = generate_inference_app()
99
+ api.upload_file(
100
+ path_or_fileobj=inference_app.encode(),
101
+ path_in_repo="app.py",
102
+ repo_id=inference_repo_id,
103
+ repo_type="space"
104
+ )
105
+
106
+ # Upload requirements for inference space
107
+ inference_requirements = generate_inference_requirements()
108
+ api.upload_file(
109
+ path_or_fileobj=inference_requirements.encode(),
110
+ path_in_repo="requirements.txt",
111
+ repo_id=inference_repo_id,
112
+ repo_type="space"
113
+ )
114
+
115
+ # Upload model and metadata
116
+ with open("model.pkl", "rb") as f:
117
+ api.upload_file(
118
+ path_or_fileobj=f,
119
+ path_in_repo="model.pkl",
120
+ repo_id=inference_repo_id,
121
+ repo_type="space"
122
+ )
123
+
124
+ with open("metadata.json", "rb") as f:
125
+ api.upload_file(
126
+ path_or_fileobj=f,
127
+ path_in_repo="metadata.json",
128
+ repo_id=inference_repo_id,
129
+ repo_type="space"
130
+ )
131
+
132
+ print(f"Inference Space deployed: https://huggingface.co/spaces/{inference_repo_id}")
133
+
134
+ except Exception as e:
135
+ print(f"Failed to deploy inference Space: {e}")
136
+
137
+ def generate_inference_requirements():
138
+ return '''gradio
139
+ joblib
140
+ pandas
141
+ fastapi
142
+ uvicorn'''
143
+
144
+ def generate_inference_app():
145
+ return '''
146
+ import gradio as gr
147
+ import joblib
148
+ import json
149
+ import pandas as pd
150
+ from fastapi import FastAPI
151
+ from fastapi.responses import JSONResponse
152
+ import uvicorn
153
+ import threading
154
+
155
+ # Load model and metadata
156
+ model = joblib.load("model.pkl")
157
+ with open("metadata.json", "r") as f:
158
+ metadata = json.load(f)
159
+
160
+ feature_names = metadata["feature_names"]
161
+
162
+ def predict(*features):
163
+ """Make prediction with the trained model"""
164
+
165
+ # Create input DataFrame
166
+ input_data = pd.DataFrame([list(features)], columns=feature_names)
167
+
168
+ # Predict
169
+ prediction = model.predict(input_data)[0]
170
+ probabilities = model.predict_proba(input_data)[0]
171
+
172
+ # Format results
173
+ prob_dict = {f"Class {i}": prob for i, prob in enumerate(probabilities)}
174
+
175
+ return f"Predicted Class: {prediction}", prob_dict
176
+
177
+ def predict_batch_from_url(file_url):
178
+ """Make batch predictions from CSV URL"""
179
+ try:
180
+ # Download and process CSV
181
+ df = pd.read_csv(file_url)
182
+
183
+ # Check if columns match
184
+ if not all(col in df.columns for col in feature_names):
185
+ return {"error": f"CSV must contain columns: {feature_names}"}
186
+
187
+ # Select only the feature columns
188
+ X = df[feature_names]
189
+
190
+ # Make predictions
191
+ predictions = model.predict(X)
192
+ probabilities = model.predict_proba(X)
193
+
194
+ # Format results
195
+ results = []
196
+ for i, (pred, probs) in enumerate(zip(predictions, probabilities)):
197
+ prob_dict = {f"Class {j}": float(prob) for j, prob in enumerate(probs)}
198
+ results.append({
199
+ "prediction": int(pred),
200
+ "probabilities": prob_dict
201
+ })
202
+
203
+ return {"predictions": results}
204
+
205
+ except Exception as e:
206
+ return {"error": str(e)}
207
+
208
+ # FastAPI for batch predictions
209
+ app = FastAPI()
210
+
211
+ @app.post("/api/predict_batch")
212
+ async def api_predict_batch(request: dict):
213
+ file_url = request.get("file_url")
214
+ if not file_url:
215
+ return JSONResponse({"error": "file_url is required"}, status_code=400)
216
+
217
+ result = predict_batch_from_url(file_url)
218
+ return JSONResponse(result)
219
+
220
+ # Gradio interface for single predictions
221
+ inputs = [gr.Number(label=name) for name in feature_names]
222
+ outputs = [
223
+ gr.Textbox(label="Prediction"),
224
+ gr.Label(label="Probabilities")
225
+ ]
226
+
227
+ interface = gr.Interface(
228
+ fn=predict,
229
+ inputs=inputs,
230
+ outputs=outputs,
231
+ title=f"{metadata['model_name']} - ML Classifier",
232
+ description=f"Accuracy: {metadata['accuracy']:.4f} | Features: {len(feature_names)}"
233
+ )
234
+
235
+ def run_fastapi():
236
+ uvicorn.run(app, host="0.0.0.0", port=8000)
237
+
238
+ if __name__ == "__main__":
239
+ # Start FastAPI in background
240
+ fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
241
+ fastapi_thread.start()
242
+
243
+ # Start Gradio
244
+ interface.launch(server_port=7860)
245
+ '''
246
+
247
+ if __name__ == "__main__":
248
+ main()