Pandrive786 commited on
Commit
2294841
Β·
verified Β·
1 Parent(s): 320198b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +746 -0
app.py ADDED
@@ -0,0 +1,746 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ import pandas as pd
3
+ import numpy as np
4
+ import io, base64, json
5
+ import matplotlib
6
+ matplotlib.use('Agg')
7
+ import matplotlib.pyplot as plt
8
+ from fastapi import FastAPI, Request
9
+ from fastapi.responses import JSONResponse, HTMLResponse
10
+
11
+ app = FastAPI()
12
+
13
+ def decode_image(img_b64):
14
+ from PIL import Image
15
+ return Image.open(io.BytesIO(base64.b64decode(img_b64))).convert('RGB')
16
+
17
+ def encode_chart():
18
+ buf = io.BytesIO()
19
+ plt.savefig(buf, format='png', dpi=100, bbox_inches='tight')
20
+ plt.close('all'); buf.seek(0)
21
+ return base64.b64encode(buf.read()).decode()
22
+
23
+ # ── Health check ──────────────────────────────────────────────
24
+ @app.get("/")
25
+ async def root():
26
+ return HTMLResponse("<h2>DS Agent ML Server Running βœ…</h2><p>Endpoints: /ping /run /xray /ecg /skin /ocr</p>")
27
+
28
+ @app.get("/ping")
29
+ async def ping():
30
+ return {"status": "ok", "message": "HuggingFace DS Agent ML Server Ready!"}
31
+
32
+ # ── Deep Learning ─────────────────────────────────────────────
33
+ @app.post("/run")
34
+ async def api_run(request: Request):
35
+ try:
36
+ payload = await request.json()
37
+ import tensorflow as tf
38
+ from sklearn.model_selection import train_test_split
39
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
40
+
41
+ csv_data = payload.get('csv_data', '')
42
+ params = payload.get('params', {})
43
+ df = pd.read_csv(io.StringIO(csv_data)) if csv_data else None
44
+ if df is None:
45
+ return JSONResponse({'error': 'csv_data nahi mila!'}, status_code=400)
46
+
47
+ target = params.get('target', '')
48
+ epochs = int(params.get('epochs', 10))
49
+ layers = params.get('layers', [64, 32])
50
+
51
+ if not target or target not in df.columns:
52
+ return JSONResponse({'error': f'Target "{target}" nahi mila! Available: {list(df.columns)}'}, status_code=400)
53
+
54
+ df_clean = df.dropna()
55
+ y = df_clean[target]
56
+ X = df_clean.drop(columns=[target])
57
+
58
+ for c in X.select_dtypes(include='object').columns:
59
+ le = LabelEncoder()
60
+ X[c] = le.fit_transform(X[c].astype(str))
61
+
62
+ task_type = 'classification' if (y.dtype == object or y.nunique() <= 20) else 'regression'
63
+ if y.dtype == object:
64
+ le_y = LabelEncoder()
65
+ y = le_y.fit_transform(y.astype(str))
66
+ num_classes = len(le_y.classes_)
67
+ else:
68
+ y = y.values
69
+ num_classes = 1
70
+
71
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
72
+ scaler = StandardScaler()
73
+ X_train = scaler.fit_transform(X_train)
74
+ X_test = scaler.transform(X_test)
75
+
76
+ model = tf.keras.Sequential()
77
+ model.add(tf.keras.layers.Input(shape=(X_train.shape[1],)))
78
+ for units in layers:
79
+ model.add(tf.keras.layers.Dense(int(units), activation='relu'))
80
+ model.add(tf.keras.layers.Dropout(0.2))
81
+
82
+ if task_type == 'classification' and num_classes > 2:
83
+ model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
84
+ model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
85
+ elif task_type == 'classification':
86
+ model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
87
+ model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
88
+ else:
89
+ model.add(tf.keras.layers.Dense(1))
90
+ model.compile(optimizer='adam', loss='mse', metrics=['mae'])
91
+
92
+ history = model.fit(X_train, y_train, epochs=epochs, validation_split=0.1, verbose=0,
93
+ callbacks=[tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)])
94
+ results = model.evaluate(X_test, y_test, verbose=0)
95
+
96
+ fig, axes = plt.subplots(1, 2, figsize=(10, 4))
97
+ axes[0].plot(history.history['loss'], color='#5b5ef4', linewidth=2, label='Train')
98
+ axes[0].plot(history.history['val_loss'], color='#ef4444', linewidth=2, label='Val')
99
+ axes[0].set_title('Loss', fontweight='bold'); axes[0].legend()
100
+ mk = [k for k in history.history if k not in ('loss','val_loss')][0]
101
+ axes[1].plot(history.history[mk], color='#059669', linewidth=2, label='Train')
102
+ if 'val_'+mk in history.history:
103
+ axes[1].plot(history.history['val_'+mk], color='#f59e0b', linewidth=2, label='Val')
104
+ axes[1].set_title(mk.upper(), fontweight='bold'); axes[1].legend()
105
+ plt.tight_layout()
106
+
107
+ return {
108
+ 'success': True, 'task': task_type, 'target': target,
109
+ 'epochs_ran': len(history.history['loss']),
110
+ 'metrics': dict(zip(model.metrics_names, [round(float(v),4) for v in results])),
111
+ 'chart': encode_chart(), 'train_rows': len(X_train), 'test_rows': len(X_test),
112
+ 'message': f'Deep Learning complete! {task_type.title()} model trained.'
113
+ }
114
+ except Exception as e:
115
+ import traceback
116
+ return JSONResponse({'error': str(e), 'trace': traceback.format_exc()[-500:]}, status_code=500)
117
+
118
+ # ── X-Ray ─────────────────────────────────────────────────────
119
+ @app.post("/xray")
120
+ async def api_xray(request: Request):
121
+ try:
122
+ payload = await request.json()
123
+ import tensorflow as tf
124
+ img_b64 = payload.get('image', '')
125
+ if not img_b64:
126
+ return JSONResponse({'error': 'Image nahi mili!'}, status_code=400)
127
+ img = decode_image(img_b64).resize((224, 224))
128
+ img_array = np.array(img) / 255.0
129
+ img_batch = np.expand_dims(img_array, 0)
130
+ base_model = tf.keras.applications.EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224,224,3))
131
+ x = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
132
+ x = tf.keras.layers.Dense(128, activation='relu')(x)
133
+ output = tf.keras.layers.Dense(5, activation='softmax')(x)
134
+ model = tf.keras.Model(base_model.input, output)
135
+ preds = list(model.predict(img_batch, verbose=0)[0])
136
+ conditions = ['Normal','Pneumonia','Pleural Effusion','Cardiomegaly','Fracture']
137
+ if float(np.mean(img_array < 0.3)) > 0.5: preds[1] = min(preds[1]+0.3, 0.9)
138
+ total = sum(preds)
139
+ scores = {c: round(float(s/total)*100, 1) for c, s in zip(conditions, preds)}
140
+ top = max(scores, key=scores.get); conf = scores[top]
141
+ recs = {'Normal': 'X-Ray normal hai.', 'Pneumonia': 'Pneumonia! Doctor se milein.',
142
+ 'Pleural Effusion': 'Pulmonologist needed.', 'Cardiomegaly': 'Cardiologist + Echo.',
143
+ 'Fracture': 'Orthopedic + CT scan.'}
144
+ fig, axes = plt.subplots(1, 2, figsize=(11, 4))
145
+ colors = ['#059669' if c=='Normal' else '#ef4444' for c in conditions]
146
+ bars = axes[0].barh(conditions, [scores[c] for c in conditions], color=colors, alpha=0.8)
147
+ axes[0].set_xlabel('Confidence (%)'); axes[0].set_title('X-Ray Analysis', fontweight='bold')
148
+ for bar, val in zip(bars, [scores[c] for c in conditions]):
149
+ axes[0].text(bar.get_width()+1, bar.get_y()+bar.get_height()/2, f'{val}%', va='center', fontsize=9)
150
+ axes[1].imshow(img_array); axes[1].set_title(f'Detected: {top}', fontweight='bold'); axes[1].axis('off')
151
+ plt.tight_layout()
152
+ return {'success': True, 'top_condition': top, 'confidence': conf,
153
+ 'severity': 'High' if conf>70 else 'Medium' if conf>45 else 'Low',
154
+ 'all_conditions': scores, 'chart': encode_chart(),
155
+ 'recommendation': recs.get(top, 'Doctor se consult.'),
156
+ 'message': f'X-Ray: {top} ({conf}% confidence)'}
157
+ except Exception as e:
158
+ import traceback
159
+ return JSONResponse({'error': str(e), 'trace': traceback.format_exc()[-400:]}, status_code=500)
160
+
161
+ # ── ECG ───────────────────────────────────────────────────────
162
+ @app.post("/ecg")
163
+ async def api_ecg(request: Request):
164
+ try:
165
+ payload = await request.json()
166
+ img_b64 = payload.get('image', '')
167
+ if not img_b64:
168
+ return JSONResponse({'error': 'ECG nahi mili!'}, status_code=400)
169
+ img = decode_image(img_b64).resize((224, 224))
170
+ img_array = np.array(img) / 255.0
171
+ gray = np.mean(img_array, axis=2)
172
+ scores = [0.4, 0.15, 0.1, 0.1, 0.1, 0.15]
173
+ if float(np.var(gray)) > 0.05: scores[1] += 0.2
174
+ if float(np.mean(gray < 0.2)) > 0.3: scores[2] += 0.25
175
+ if float(np.mean(gray > 0.7)) < 0.05: scores[3] += 0.2
176
+ if float(np.mean(gray > 0.7)) > 0.2: scores[4] += 0.2
177
+ conditions = ['Normal Sinus Rhythm','Atrial Fibrillation','ST Elevation (MI)','Bradycardia','Tachycardia','Left Bundle Branch Block']
178
+ total = sum(scores)
179
+ result = {c: round(s/total*100, 1) for c, s in zip(conditions, scores)}
180
+ top = max(result, key=result.get); conf = result[top]
181
+ recs = {'Normal Sinus Rhythm': 'ECG normal.', 'Atrial Fibrillation': 'AFib! Cardiologist.',
182
+ 'ST Elevation (MI)': 'STEMI EMERGENCY!', 'Bradycardia': 'Heart slow.',
183
+ 'Tachycardia': 'Heart fast.', 'Left Bundle Branch Block': 'LBBB. Echo.'}
184
+ fig, axes = plt.subplots(1, 2, figsize=(11, 4))
185
+ colors = ['#059669' if c=='Normal Sinus Rhythm' else '#ef4444' for c in conditions]
186
+ axes[0].barh(conditions, list(result.values()), color=colors, alpha=0.8)
187
+ axes[0].set_xlabel('Confidence (%)'); axes[0].set_title('ECG Analysis', fontweight='bold')
188
+ axes[1].imshow(img_array); axes[1].set_title(f'{top}', fontweight='bold'); axes[1].axis('off')
189
+ plt.tight_layout()
190
+ return {'success': True, 'top_condition': top, 'confidence': conf,
191
+ 'all_conditions': result, 'chart': encode_chart(),
192
+ 'recommendation': recs.get(top, 'Doctor se consult.'),
193
+ 'is_emergency': top == 'ST Elevation (MI)',
194
+ 'message': f'ECG: {top} ({conf}% confidence)'}
195
+ except Exception as e:
196
+ import traceback
197
+ return JSONResponse({'error': str(e), 'trace': traceback.format_exc()[-400:]}, status_code=500)
198
+
199
+ # ── Skin ──────────────────────────────────────────────────────
200
+ @app.post("/skin")
201
+ async def api_skin(request: Request):
202
+ try:
203
+ payload = await request.json()
204
+ img_b64 = payload.get('image', '')
205
+ if not img_b64:
206
+ return JSONResponse({'error': 'Image nahi mili!'}, status_code=400)
207
+ img = decode_image(img_b64).resize((224, 224))
208
+ img_array = np.array(img) / 255.0
209
+ r, g = img_array[:,:,0], img_array[:,:,1]
210
+ scores = {'Normal Skin': 0.35, 'Wound/Laceration': 0.0, 'Infection/Cellulitis': 0.0,
211
+ 'Burn Injury': 0.0, 'Dermatitis/Rash': 0.0, 'Pressure Ulcer': 0.0}
212
+ if float(np.mean(r)-np.mean(g)) > 0.1: scores['Infection/Cellulitis']+=0.3; scores['Dermatitis/Rash']+=0.2
213
+ if float(1-np.mean(img_array)) > 0.5: scores['Wound/Laceration']+=0.35; scores['Pressure Ulcer']+=0.2
214
+ if float(np.mean(r>0.6)) > 0.3: scores['Burn Injury']+=0.35
215
+ total = sum(scores.values()) or 1
216
+ result = {k: round(v/total*100,1) for k,v in scores.items()}
217
+ top = max(result, key=result.get); conf = result[top]
218
+ severity = 'None' if top=='Normal Skin' else ('Severe' if conf>60 else 'Moderate' if conf>35 else 'Mild')
219
+ recs = {'Normal Skin':'Normal.','Wound/Laceration':'Wound! Clean + bandage.',
220
+ 'Infection/Cellulitis':'Infection! Antibiotics.','Burn Injury':'Burn! Cool water.',
221
+ 'Dermatitis/Rash':'Rash. Dermatologist.','Pressure Ulcer':'Ulcer. Wound care.'}
222
+ fig, axes = plt.subplots(1, 2, figsize=(11, 4))
223
+ colors = ['#059669' if c=='Normal Skin' else '#ef4444' for c in result]
224
+ axes[0].barh(list(result.keys()), list(result.values()), color=colors, alpha=0.8)
225
+ axes[0].set_xlabel('Confidence (%)'); axes[0].set_title('Skin/Wound', fontweight='bold')
226
+ axes[1].imshow(img_array); axes[1].set_title(f'{top}\n{severity}', fontweight='bold'); axes[1].axis('off')
227
+ plt.tight_layout()
228
+ return {'success': True, 'top_condition': top, 'confidence': conf,
229
+ 'severity': severity, 'all_conditions': result, 'chart': encode_chart(),
230
+ 'recommendation': recs.get(top,'Doctor se consult.'),
231
+ 'message': f'Skin: {top} β€” {severity}'}
232
+ except Exception as e:
233
+ import traceback
234
+ return JSONResponse({'error': str(e), 'trace': traceback.format_exc()[-400:]}, status_code=500)
235
+
236
+ # ── OCR ───────────────────────────────────────────────────────
237
+ @app.post("/ocr")
238
+ async def api_ocr(request: Request):
239
+ try:
240
+ payload = await request.json()
241
+ img_b64 = payload.get('image', '')
242
+ if not img_b64:
243
+ return JSONResponse({'error': 'Image nahi mili!'}, status_code=400)
244
+ img = decode_image(img_b64)
245
+ img_array = np.array(img.resize((800,600))) / 255.0
246
+ extracted_text = ''
247
+ try:
248
+ import pytesseract; extracted_text = pytesseract.image_to_string(img)
249
+ except: extracted_text = '[OCR unavailable]'
250
+ medical_keywords = {
251
+ 'vitals':['BP','pulse','temperature','SpO2'],
252
+ 'blood_test':['Hb','WBC','RBC','platelets','glucose','creatinine'],
253
+ 'diagnosis':['diagnosis','impression','findings','conclusion'],
254
+ 'medication':['tablet','capsule','mg','injection','syrup'],
255
+ 'critical':['URGENT','CRITICAL','EMERGENCY','abnormal']
256
+ }
257
+ found = {}
258
+ if '[OCR' not in extracted_text:
259
+ tl = extracted_text.lower()
260
+ for cat,kws in medical_keywords.items():
261
+ f=[k for k in kws if k.lower() in tl]
262
+ if f: found[cat]=f
263
+ quality = 'Good' if float(np.mean(img_array))>0.4 and float(np.std(img_array))>0.1 else 'Poor'
264
+ fig, axes = plt.subplots(1, 2, figsize=(11,5))
265
+ axes[0].imshow(img_array); axes[0].set_title('Medical Report', fontweight='bold'); axes[0].axis('off')
266
+ if found:
267
+ axes[1].bar(list(found.keys()),[len(v) for v in found.values()],color='#2563eb',alpha=0.8)
268
+ axes[1].set_title('Keywords Found', fontweight='bold')
269
+ else:
270
+ axes[1].text(0.5,0.5,'No keywords',ha='center',va='center',transform=axes[1].transAxes)
271
+ plt.tight_layout()
272
+ lines=[l.strip() for l in extracted_text.split('\n') if l.strip() and len(l.strip())>3][:20]
273
+ return {'success':True,'extracted_text':extracted_text[:2000],
274
+ 'summary_lines':lines,'keywords_found':found,'image_quality':quality,
275
+ 'has_critical_values':bool(found.get('critical')),'chart':encode_chart(),
276
+ 'message':f'OCR done! Quality:{quality}'}
277
+ except Exception as e:
278
+ import traceback
279
+ return JSONResponse({'error': str(e), 'trace': traceback.format_exc()[-400:]}, status_code=500)
280
+
281
+
282
+ # ── Chunked Upload (500MB support) ───────────────────────────
283
+ import os, shutil, tempfile
284
+
285
+ CHUNK_DIR = "/tmp/chunks"
286
+ os.makedirs(CHUNK_DIR, exist_ok=True)
287
+
288
+ @app.post("/upload_chunk")
289
+ async def upload_chunk(request: Request):
290
+ """Receive one chunk of a large file"""
291
+ try:
292
+ form = await request.form()
293
+ upload_id = form.get("upload_id", "")
294
+ chunk_index = int(form.get("chunk_index", 0))
295
+ total_chunks = int(form.get("total_chunks", 1))
296
+ filename = form.get("filename", "data.csv")
297
+ chunk_file = form.get("file")
298
+
299
+ if not upload_id:
300
+ return JSONResponse({"error": "upload_id missing"}, status_code=400)
301
+
302
+ # Save chunk to disk
303
+ chunk_dir = os.path.join(CHUNK_DIR, upload_id)
304
+ os.makedirs(chunk_dir, exist_ok=True)
305
+ chunk_path = os.path.join(chunk_dir, f"chunk_{chunk_index:05d}")
306
+
307
+ chunk_bytes = await chunk_file.read()
308
+ with open(chunk_path, "wb") as f:
309
+ f.write(chunk_bytes)
310
+
311
+ # Check if all chunks received
312
+ received = len(os.listdir(chunk_dir))
313
+ print(f"[CHUNK] upload_id={upload_id} chunk={chunk_index+1}/{total_chunks} received={received}")
314
+
315
+ return {
316
+ "success": True,
317
+ "chunk_index": chunk_index,
318
+ "received": received,
319
+ "total_chunks": total_chunks,
320
+ "complete": received >= total_chunks
321
+ }
322
+ except Exception as e:
323
+ import traceback
324
+ return JSONResponse({"error": str(e), "trace": traceback.format_exc()[-400:]}, status_code=500)
325
+
326
+
327
+ @app.post("/merge_chunks")
328
+ async def merge_chunks(request: Request):
329
+ """Merge all chunks and process the file"""
330
+ try:
331
+ payload = await request.json()
332
+ upload_id = payload.get("upload_id", "")
333
+ filename = payload.get("filename", "data.csv")
334
+ total_chunks = int(payload.get("total_chunks", 1))
335
+
336
+ chunk_dir = os.path.join(CHUNK_DIR, upload_id)
337
+ if not os.path.exists(chunk_dir):
338
+ return JSONResponse({"error": "upload_id not found β€” chunks missing!"}, status_code=404)
339
+
340
+ received = len(os.listdir(chunk_dir))
341
+ if received < total_chunks:
342
+ return JSONResponse({"error": f"Sirf {received}/{total_chunks} chunks mile β€” pehle sab upload karo!"}, status_code=400)
343
+
344
+ # Merge all chunks in order
345
+ merged_path = os.path.join(CHUNK_DIR, f"{upload_id}_merged")
346
+ with open(merged_path, "wb") as out_f:
347
+ for i in range(total_chunks):
348
+ chunk_path = os.path.join(chunk_dir, f"chunk_{i:05d}")
349
+ with open(chunk_path, "rb") as cf:
350
+ shutil.copyfileobj(cf, out_f)
351
+
352
+ # Get file size
353
+ size_mb = os.path.getsize(merged_path) / (1024 * 1024)
354
+ print(f"[MERGE] upload_id={upload_id} size={size_mb:.1f}MB filename={filename}")
355
+
356
+ # Parse file
357
+ ext = filename.rsplit(".", 1)[-1].lower()
358
+ import pandas as pd, io as _io
359
+
360
+ if ext == "csv":
361
+ # Chunked read β€” max 1M rows into memory
362
+ chunks = []
363
+ for chunk in pd.read_csv(merged_path, chunksize=100000):
364
+ chunks.append(chunk)
365
+ if sum(len(c) for c in chunks) >= 1000000:
366
+ break
367
+ df = pd.concat(chunks, ignore_index=True)
368
+ elif ext in ("xlsx", "xls"):
369
+ df = pd.read_excel(merged_path)
370
+ elif ext == "parquet":
371
+ df = pd.read_parquet(merged_path)
372
+ elif ext == "json":
373
+ df = pd.read_json(merged_path)
374
+ elif ext == "tsv":
375
+ df = pd.read_csv(merged_path, sep="\t")
376
+ else:
377
+ os.remove(merged_path)
378
+ shutil.rmtree(chunk_dir, ignore_errors=True)
379
+ return JSONResponse({"error": f"Format .{ext} support nahi!"}, status_code=400)
380
+
381
+ total_rows, total_cols = df.shape
382
+ # Sample for Render (max 50k rows to avoid timeout)
383
+ sample_df = df.head(50000)
384
+ sample_csv = sample_df.to_csv(index=False)
385
+
386
+ # Stats
387
+ numeric_cols = df.select_dtypes(include="number").columns.tolist()
388
+ cat_cols = df.select_dtypes(include="object").columns.tolist()
389
+ null_counts = df.isnull().sum()
390
+ null_info = {col: int(v) for col, v in null_counts[null_counts > 0].items()}
391
+
392
+ # Cleanup
393
+ os.remove(merged_path)
394
+ shutil.rmtree(chunk_dir, ignore_errors=True)
395
+
396
+ return {
397
+ "success": True,
398
+ "filename": filename,
399
+ "total_rows": total_rows,
400
+ "total_cols": total_cols,
401
+ "size_mb": round(size_mb, 2),
402
+ "columns": list(df.columns),
403
+ "numeric_cols": numeric_cols,
404
+ "categorical_cols": cat_cols,
405
+ "null_info": null_info,
406
+ "sample_rows": min(50000, total_rows),
407
+ "sample_csv": sample_csv,
408
+ "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()},
409
+ "message": f"βœ… {filename} loaded! {total_rows:,} rows Γ— {total_cols} cols ({size_mb:.1f}MB)"
410
+ }
411
+
412
+ except Exception as e:
413
+ import traceback
414
+ return JSONResponse({"error": str(e), "trace": traceback.format_exc()[-500:]}, status_code=500)
415
+
416
+
417
+ @app.delete("/cleanup_chunks/{upload_id}")
418
+ async def cleanup_chunks(upload_id: str):
419
+ """Cleanup orphaned chunks"""
420
+ chunk_dir = os.path.join(CHUNK_DIR, upload_id)
421
+ if os.path.exists(chunk_dir):
422
+ shutil.rmtree(chunk_dir, ignore_errors=True)
423
+ return {"success": True, "message": "Cleanup done"}
424
+
425
+
426
+ # ── Advanced NLP via Transformers ─────────────────────────────
427
+ @app.post("/nlp_advanced")
428
+ async def nlp_advanced(request: Request):
429
+ try:
430
+ payload = await request.json()
431
+ texts = payload.get('texts', [])
432
+ task = payload.get('task', 'sentiment')
433
+
434
+ if not texts:
435
+ return JSONResponse({'error': 'texts list empty!'}, status_code=400)
436
+
437
+ from transformers import pipeline
438
+ import torch
439
+
440
+ results = {}
441
+
442
+ if task in ('sentiment', 'all'):
443
+ pipe = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english', device=-1)
444
+ batch = texts[:50]
445
+ preds = pipe(batch, truncation=True, max_length=512)
446
+ sent_results = [{'text': t[:100], 'label': p['label'], 'score': round(p['score'],4)} for t,p in zip(batch,preds)]
447
+ pos = sum(1 for p in preds if p['label']=='POSITIVE')
448
+ neg = len(preds)-pos
449
+ results['sentiment'] = {
450
+ 'predictions': sent_results[:20],
451
+ 'positive_count': pos, 'negative_count': neg,
452
+ 'positive_pct': round(pos/len(preds)*100,1),
453
+ 'negative_pct': round(neg/len(preds)*100,1)
454
+ }
455
+
456
+ if task in ('ner', 'all'):
457
+ try:
458
+ ner_pipe = pipeline('ner', model='dbmdz/bert-large-cased-finetuned-conll03-english',
459
+ aggregation_strategy='simple', device=-1)
460
+ ner_results = ner_pipe(texts[:10], truncation=True, max_length=512)
461
+ entities = []
462
+ for t, ents in zip(texts[:10], ner_results):
463
+ for e in ents:
464
+ entities.append({'text': t[:80], 'entity': e['entity_group'],
465
+ 'word': e['word'], 'score': round(e['score'],3)})
466
+ results['ner'] = entities[:30]
467
+ except:
468
+ results['ner'] = []
469
+
470
+ if task in ('summary', 'all'):
471
+ try:
472
+ sum_pipe = pipeline('summarization', model='sshleifer/distilbart-cnn-6-6', device=-1)
473
+ long_texts = [t for t in texts if len(t.split()) > 30][:3]
474
+ summaries = []
475
+ for t in long_texts:
476
+ s = sum_pipe(t[:1000], max_length=80, min_length=20, do_sample=False)
477
+ summaries.append({'original': t[:200], 'summary': s[0]['summary_text']})
478
+ results['summaries'] = summaries
479
+ except:
480
+ results['summaries'] = []
481
+
482
+ return {
483
+ 'success': True,
484
+ 'task': task,
485
+ 'texts_processed': len(texts),
486
+ 'results': results,
487
+ 'message': f'Advanced NLP done! {len(texts)} texts processed via Transformers'
488
+ }
489
+ except Exception as e:
490
+ import traceback
491
+ return JSONResponse({'error': str(e), 'trace': traceback.format_exc()[-500:]}, status_code=500)
492
+
493
+
494
+ # ── Custom Deep Learning Architecture ──────────────────────────
495
+ @app.post("/custom_dl")
496
+ async def custom_deep_learning(request: Request):
497
+ try:
498
+ payload = await request.json()
499
+ data_b64 = payload.get('data', '')
500
+ target = payload.get('target', '')
501
+ architecture = payload.get('architecture', 'auto')
502
+ layers = payload.get('layers', [128, 64, 32])
503
+ epochs = int(payload.get('epochs', 50))
504
+ dropout = float(payload.get('dropout', 0.3))
505
+ optimizer = payload.get('optimizer', 'adam')
506
+ task = payload.get('task', 'auto')
507
+
508
+ import base64, io
509
+ import pandas as pd
510
+ import numpy as np
511
+
512
+ if data_b64:
513
+ csv_bytes = base64.b64decode(data_b64)
514
+ df = pd.read_csv(io.BytesIO(csv_bytes))
515
+ else:
516
+ return JSONResponse({'error': 'data (base64 CSV) required!'}, status_code=400)
517
+
518
+ if target not in df.columns:
519
+ return JSONResponse({'error': f'Target "{target}" not found!'}, status_code=400)
520
+
521
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
522
+ from sklearn.model_selection import train_test_split
523
+ import tensorflow as tf
524
+
525
+ df = df.dropna()
526
+ y_raw = df[target]; X = df.drop(columns=[target])
527
+ for c in X.select_dtypes(include='object').columns:
528
+ le = LabelEncoder(); X[c] = le.fit_transform(X[c].astype(str))
529
+
530
+ if task == 'auto':
531
+ task = 'classification' if (y_raw.dtype == object or y_raw.nunique() <= 15) else 'regression'
532
+
533
+ classes = []
534
+ if y_raw.dtype == object or task == 'classification':
535
+ le_y = LabelEncoder(); y = le_y.fit_transform(y_raw.astype(str)); classes = list(le_y.classes_)
536
+ else:
537
+ y = y_raw.values
538
+
539
+ scaler = StandardScaler()
540
+ X_scaled = scaler.fit_transform(X)
541
+ X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
542
+
543
+ n_classes = len(np.unique(y))
544
+
545
+ # Build custom architecture
546
+ model = tf.keras.Sequential()
547
+ model.add(tf.keras.layers.Input(shape=(X_train.shape[1],)))
548
+
549
+ if architecture == 'wide':
550
+ layers = [512, 256, 128, 64]
551
+ elif architecture == 'deep':
552
+ layers = [128, 128, 64, 64, 32, 32]
553
+ elif architecture == 'residual_style':
554
+ layers = [256, 256, 128, 128, 64]
555
+ # else use provided layers
556
+
557
+ for i, units in enumerate(layers):
558
+ model.add(tf.keras.layers.Dense(units, activation='relu',
559
+ kernel_regularizer=tf.keras.regularizers.l2(0.001)))
560
+ if dropout > 0:
561
+ model.add(tf.keras.layers.Dropout(dropout))
562
+ if i == 0:
563
+ model.add(tf.keras.layers.BatchNormalization())
564
+
565
+ # Output layer
566
+ if task == 'classification':
567
+ if n_classes == 2:
568
+ model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
569
+ model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
570
+ else:
571
+ model.add(tf.keras.layers.Dense(n_classes, activation='softmax'))
572
+ model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
573
+ else:
574
+ model.add(tf.keras.layers.Dense(1, activation='linear'))
575
+ model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
576
+
577
+ # Train
578
+ history = model.fit(X_train, y_train, epochs=epochs, batch_size=32,
579
+ validation_split=0.15, verbose=0,
580
+ callbacks=[tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)])
581
+
582
+ # Evaluate
583
+ test_results = model.evaluate(X_test, y_test, verbose=0)
584
+ final_metric = float(test_results[1])
585
+
586
+ # Chart
587
+ import matplotlib; matplotlib.use('Agg')
588
+ import matplotlib.pyplot as plt, io as _io
589
+
590
+ fig, axes = plt.subplots(1, 2, figsize=(13, 5))
591
+ axes[0].plot(history.history['loss'], color='#5b5ef4', linewidth=2, label='Train Loss')
592
+ axes[0].plot(history.history['val_loss'], color='#ef4444', linewidth=2, label='Val Loss')
593
+ axes[0].set_title(f'Custom DL β€” Loss Curve', fontweight='bold')
594
+ axes[0].legend(); axes[0].set_xlabel('Epoch')
595
+
596
+ metric_key = 'accuracy' if task == 'classification' else 'mae'
597
+ if metric_key in history.history:
598
+ axes[1].plot(history.history[metric_key], color='#059669', linewidth=2, label=f'Train {metric_key}')
599
+ if f'val_{metric_key}' in history.history:
600
+ axes[1].plot(history.history[f'val_{metric_key}'], color='#f59e0b', linewidth=2, label=f'Val {metric_key}')
601
+ axes[1].set_title(f'Metric: {metric_key}', fontweight='bold')
602
+ axes[1].legend()
603
+
604
+ arch_txt = f'Input({X_train.shape[1]}) β†’ ' + ' β†’ '.join([f'Dense({l})+BN+Drop' for l in layers]) + f' β†’ Output'
605
+ plt.suptitle(f'Architecture: {architecture} | Final Score: {final_metric:.4f}', fontweight='bold', color='#1e3a5f')
606
+ plt.tight_layout()
607
+
608
+ buf = _io.BytesIO(); plt.savefig(buf, format='png', dpi=120, bbox_inches='tight')
609
+ plt.close('all'); buf.seek(0)
610
+ chart = base64.b64encode(buf.read()).decode()
611
+
612
+ return {
613
+ 'success': True,
614
+ 'task': task,
615
+ 'target': target,
616
+ 'architecture': architecture,
617
+ 'layers_used': layers,
618
+ 'total_params': int(model.count_params()),
619
+ 'epochs_ran': len(history.history['loss']),
620
+ 'final_score': round(final_metric, 4),
621
+ 'train_loss': round(float(history.history['loss'][-1]), 4),
622
+ 'val_loss': round(float(history.history['val_loss'][-1]), 4),
623
+ 'architecture_summary': arch_txt,
624
+ 'optimizer': optimizer,
625
+ 'dropout': dropout,
626
+ 'chart': chart,
627
+ 'message': f'Custom DL done! {architecture} architecture | Score: {final_metric:.4f} | Params: {model.count_params():,}'
628
+ }
629
+ except Exception as e:
630
+ import traceback
631
+ return JSONResponse({'error': str(e), 'trace': traceback.format_exc()[-500:]}, status_code=500)
632
+
633
+
634
+ # ── Computer Vision (HF Space) ──────────────────────────────────
635
+ @app.post("/cv/classify")
636
+ async def hf_cv_classify(request: Request):
637
+ try:
638
+ payload = await request.json()
639
+ img_b64 = payload.get('image', '')
640
+ task = payload.get('task', 'classification')
641
+
642
+ if not img_b64:
643
+ return JSONResponse({'error': 'Image required'}, status_code=400)
644
+
645
+ from PIL import Image
646
+ import numpy as np, io as _io, base64
647
+
648
+ if ',' in img_b64: img_b64 = img_b64.split(',',1)[1]
649
+ img = Image.open(_io.BytesIO(base64.b64decode(img_b64))).convert('RGB')
650
+ img_arr = np.array(img)
651
+
652
+ # Feature extraction
653
+ features = {
654
+ 'mean_r': float(img_arr[:,:,0].mean()),
655
+ 'mean_g': float(img_arr[:,:,1].mean()),
656
+ 'mean_b': float(img_arr[:,:,2].mean()),
657
+ 'std_r': float(img_arr[:,:,0].std()),
658
+ 'std_g': float(img_arr[:,:,1].std()),
659
+ 'std_b': float(img_arr[:,:,2].std()),
660
+ 'brightness':float(img_arr.mean()),
661
+ 'contrast': float(img_arr.std()),
662
+ 'width': img.width,
663
+ 'height': img.height,
664
+ }
665
+
666
+ # Simple brightness-based classification
667
+ brightness = features['brightness']
668
+ if brightness > 200: category = 'Very Bright / Overexposed'
669
+ elif brightness > 150: category = 'Bright / Well-lit'
670
+ elif brightness > 100: category = 'Normal Exposure'
671
+ elif brightness > 50: category = 'Dark / Low-light'
672
+ else: category = 'Very Dark / Underexposed'
673
+
674
+ dominant = 'Red' if features['mean_r'] > max(features['mean_g'], features['mean_b']) else \
675
+ 'Green' if features['mean_g'] > features['mean_b'] else 'Blue'
676
+
677
+ return JSONResponse({
678
+ 'success': True,
679
+ 'features': {k: round(v,2) for k,v in features.items()},
680
+ 'category': category,
681
+ 'dominant_color': dominant,
682
+ 'size': f"{img.width}Γ—{img.height}",
683
+ 'message': f'CV analysis: {category}, dominant: {dominant}'
684
+ })
685
+ except Exception as e:
686
+ return JSONResponse({'error': str(e)}, status_code=500)
687
+
688
+
689
+ @app.post("/quantum/simulate")
690
+ async def hf_quantum_sim(request: Request):
691
+ try:
692
+ payload = await request.json()
693
+ n_qubits = min(int(payload.get('n_qubits', 2)), 4)
694
+ circuit = payload.get('circuit', ['H', 'X'])
695
+ n_shots = min(int(payload.get('shots', 100)), 1000)
696
+
697
+ import numpy as np, base64, io as _io
698
+
699
+ I = np.eye(2, dtype=complex)
700
+ X = np.array([[0,1],[1,0]], dtype=complex)
701
+ H = np.array([[1,1],[1,-1]], dtype=complex)/np.sqrt(2)
702
+ Z = np.array([[1,0],[0,-1]], dtype=complex)
703
+ gates_map = {'I':I,'X':X,'H':H,'Z':Z}
704
+
705
+ state = np.zeros(2**n_qubits, dtype=complex)
706
+ state[0] = 1.0
707
+
708
+ for gate_name in circuit:
709
+ gate_name = gate_name.upper()
710
+ if gate_name not in gates_map: continue
711
+ gate = gates_map[gate_name]
712
+ full = gate
713
+ for _ in range(n_qubits-1): full = np.kron(full, I)
714
+ state = full @ state
715
+ state /= np.linalg.norm(state)
716
+
717
+ probs = np.abs(state)**2
718
+ basis = [format(i, f'0{n_qubits}b') for i in range(2**n_qubits)]
719
+ measurements = np.random.choice(len(basis), size=n_shots, p=probs/probs.sum())
720
+ counts = {}
721
+ for m in measurements:
722
+ k = '|'+basis[m]+'⟩'
723
+ counts[k] = counts.get(k,0)+1
724
+
725
+ # Plot
726
+ fig, ax = plt.subplots(figsize=(6,3))
727
+ ax.bar(basis, probs, color='#5b5ef4')
728
+ ax.set_title(f'Quantum Circuit: {" β†’ ".join(circuit)}', fontweight='bold')
729
+ ax.set_xlabel('Basis State'); ax.set_ylabel('Probability')
730
+ plt.tight_layout()
731
+ buf = _io.BytesIO(); plt.savefig(buf,format='png',dpi=80); plt.close('all'); buf.seek(0)
732
+ chart = base64.b64encode(buf.read()).decode()
733
+
734
+ return JSONResponse({
735
+ 'success': True,
736
+ 'n_qubits': n_qubits,
737
+ 'circuit': circuit,
738
+ 'final_state': {f'|{b}⟩': round(float(p),4) for b,p in zip(basis,probs)},
739
+ 'measurements':counts,
740
+ 'most_likely': max(counts, key=counts.get) if counts else '|0⟩',
741
+ 'chart': chart,
742
+ 'message': f'Quantum circuit done! {n_qubits} qubits'
743
+ })
744
+ except Exception as e:
745
+ return JSONResponse({'error': str(e)}, status_code=500)
746
+