abhinav337463 commited on
Commit
4c3b070
·
verified ·
1 Parent(s): 09d898e

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +39 -9
main.py CHANGED
@@ -63,7 +63,11 @@ class LogicFrontierEngine:
63
  "test_spam": 0,
64
  "low_density": 0,
65
  "fuzzy": 0
66
- }
 
 
 
 
67
  }
68
  self.is_running = False
69
  self.lock = threading.Lock()
@@ -135,6 +139,8 @@ class LogicFrontierEngine:
135
  # --- TURBO LOAD: DIRECT TARGET LANGUAGES ---
136
  for folder in Config.TARGET_FOLDERS:
137
  try:
 
 
138
  print(f"FRONTIER: Targeting folder {folder}...")
139
  dataset = load_dataset(Config.SOURCE_DATASET, data_dir=folder, split="train", streaming=True)
140
 
@@ -144,6 +150,12 @@ class LogicFrontierEngine:
144
  text = row.get('content', '')
145
  lang = row.get('language', 'unknown')
146
 
 
 
 
 
 
 
147
  if lang not in Config.ALLOWED_LANGS:
148
  self.stats["rejects"]["wrong_lang"] += 1
149
  continue
@@ -178,8 +190,12 @@ class LogicFrontierEngine:
178
 
179
  if self.stats["gold_files"] % 250 == 0: conn.commit()
180
 
181
- except: continue
182
- except: continue
 
 
 
 
183
 
184
  f_vault.close(); self.stats["status"] = "Completed"; self.is_running = False
185
 
@@ -191,6 +207,7 @@ class LogicFrontierEngine:
191
  shutil.copyfileobj(f_in, f_out)
192
  self.api.upload_file(path_or_fileobj=gz, path_in_repo=f"{Config.TARGET_DIR}/{gz}", repo_id=Config.TARGET_REPO, repo_type="dataset")
193
  self.stats["shards_pushed"] += 1
 
194
  self.last_upload_time = time.time()
195
  os.remove(gz)
196
 
@@ -200,21 +217,34 @@ app = FastAPI(); engine = LogicFrontierEngine()
200
  @app.get("/health")
201
  def health():
202
  uptime = time.time() - engine.stats["start_time"]
 
 
 
 
203
  return {
204
- "engine": "V56 LOGIC-FRONTIER-MAX",
205
  "status": engine.stats["status"],
206
- "uptime_sec": round(uptime, 2),
 
207
  "performance": {
208
  "processed_total": engine.stats["processed_total"],
209
  "gold_files": engine.stats["gold_files"],
210
  "success_rate": f"{(engine.stats['gold_files']/max(1, engine.stats['processed_total'])*100):.2f}%",
211
- "tokens_est": f"{engine.stats['total_tokens']/1e6:.2f}M",
212
- "speed_files_per_sec": round(engine.stats["processed_total"] / max(1, uptime), 2)
 
 
 
213
  },
214
  "data_insights": {
215
  "top_languages": dict(engine.stats["lang_distribution"]),
216
  "vault_mb": engine.stats["vault_mb"],
217
- "shards_uploaded": engine.stats["shards_pushed"]
 
 
 
 
 
218
  },
219
  "reject_analysis": engine.stats["rejects"]
220
  }
@@ -224,7 +254,7 @@ def ping(bt: BackgroundTasks):
224
  if not engine.is_running:
225
  bt.add_task(engine.start_streaming)
226
  return {"msg": "Logic Frontier Online. Turbo Mode Enabled."}
227
- return {"msg": "In progress."}
228
 
229
  if __name__ == "__main__":
230
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
63
  "test_spam": 0,
64
  "low_density": 0,
65
  "fuzzy": 0
66
+ },
67
+ # --- NEW DATA ADDED ---
68
+ "current_folder": "None",
69
+ "last_shard_time": 0,
70
+ "session_errors": 0
71
  }
72
  self.is_running = False
73
  self.lock = threading.Lock()
 
139
  # --- TURBO LOAD: DIRECT TARGET LANGUAGES ---
140
  for folder in Config.TARGET_FOLDERS:
141
  try:
142
+ # NEW CODE: Status update
143
+ self.stats["current_folder"] = folder
144
  print(f"FRONTIER: Targeting folder {folder}...")
145
  dataset = load_dataset(Config.SOURCE_DATASET, data_dir=folder, split="train", streaming=True)
146
 
 
150
  text = row.get('content', '')
151
  lang = row.get('language', 'unknown')
152
 
153
+ # --- NEW PATCH: OVERRIDE UNKNOWN LANG WITHOUT DELETING OLD LINE ---
154
+ if lang == 'unknown':
155
+ # Logic: Map folder names to ALLOWED_LANGS keys
156
+ mapping = {"python": "Python", "cpp": "C++", "java": "Java", "javascript": "JavaScript", "typescript": "TypeScript", "go": "Go", "rust": "Rust"}
157
+ lang = mapping.get(folder, lang)
158
+
159
  if lang not in Config.ALLOWED_LANGS:
160
  self.stats["rejects"]["wrong_lang"] += 1
161
  continue
 
190
 
191
  if self.stats["gold_files"] % 250 == 0: conn.commit()
192
 
193
+ except:
194
+ self.stats["session_errors"] += 1
195
+ continue
196
+ except:
197
+ self.stats["session_errors"] += 1
198
+ continue
199
 
200
  f_vault.close(); self.stats["status"] = "Completed"; self.is_running = False
201
 
 
207
  shutil.copyfileobj(f_in, f_out)
208
  self.api.upload_file(path_or_fileobj=gz, path_in_repo=f"{Config.TARGET_DIR}/{gz}", repo_id=Config.TARGET_REPO, repo_type="dataset")
209
  self.stats["shards_pushed"] += 1
210
+ self.stats["last_shard_time"] = time.time()
211
  self.last_upload_time = time.time()
212
  os.remove(gz)
213
 
 
217
  @app.get("/health")
218
  def health():
219
  uptime = time.time() - engine.stats["start_time"]
220
+ # NEW: Enhanced Calculations
221
+ token_val = engine.stats['total_tokens']
222
+ files_per_min = (engine.stats["processed_total"] / max(1, uptime)) * 60
223
+
224
  return {
225
+ "engine": "V57 LOGIC-FRONTIER-ELITE",
226
  "status": engine.stats["status"],
227
+ "active_folder": engine.stats["current_folder"],
228
+ "uptime_formatted": f"{int(uptime//3600)}h {int((uptime%3600)//60)}m",
229
  "performance": {
230
  "processed_total": engine.stats["processed_total"],
231
  "gold_files": engine.stats["gold_files"],
232
  "success_rate": f"{(engine.stats['gold_files']/max(1, engine.stats['processed_total'])*100):.2f}%",
233
+ "tokens_est": f"{token_val/1e6:.2f}M",
234
+ "speed_metrics": {
235
+ "files_per_sec": round(engine.stats["processed_total"] / max(1, uptime), 2),
236
+ "files_per_min": round(files_per_min, 2)
237
+ }
238
  },
239
  "data_insights": {
240
  "top_languages": dict(engine.stats["lang_distribution"]),
241
  "vault_mb": engine.stats["vault_mb"],
242
+ "shards_uploaded": engine.stats["shards_pushed"],
243
+ "last_shard_pushed": time.ctime(engine.stats["last_shard_time"]) if engine.stats["last_shard_time"] > 0 else "None"
244
+ },
245
+ "system_health": {
246
+ "internal_errors": engine.stats["session_errors"],
247
+ "db_path": Config.INDEX_DB
248
  },
249
  "reject_analysis": engine.stats["rejects"]
250
  }
 
254
  if not engine.is_running:
255
  bt.add_task(engine.start_streaming)
256
  return {"msg": "Logic Frontier Online. Turbo Mode Enabled."}
257
+ return {"msg": "Running."}
258
 
259
  if __name__ == "__main__":
260
  uvicorn.run(app, host="0.0.0.0", port=7860)