Spaces:
Paused
Paused
Update main.py
Browse files
main.py
CHANGED
|
@@ -63,7 +63,11 @@ class LogicFrontierEngine:
|
|
| 63 |
"test_spam": 0,
|
| 64 |
"low_density": 0,
|
| 65 |
"fuzzy": 0
|
| 66 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
}
|
| 68 |
self.is_running = False
|
| 69 |
self.lock = threading.Lock()
|
|
@@ -135,6 +139,8 @@ class LogicFrontierEngine:
|
|
| 135 |
# --- TURBO LOAD: DIRECT TARGET LANGUAGES ---
|
| 136 |
for folder in Config.TARGET_FOLDERS:
|
| 137 |
try:
|
|
|
|
|
|
|
| 138 |
print(f"FRONTIER: Targeting folder {folder}...")
|
| 139 |
dataset = load_dataset(Config.SOURCE_DATASET, data_dir=folder, split="train", streaming=True)
|
| 140 |
|
|
@@ -144,6 +150,12 @@ class LogicFrontierEngine:
|
|
| 144 |
text = row.get('content', '')
|
| 145 |
lang = row.get('language', 'unknown')
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
if lang not in Config.ALLOWED_LANGS:
|
| 148 |
self.stats["rejects"]["wrong_lang"] += 1
|
| 149 |
continue
|
|
@@ -178,8 +190,12 @@ class LogicFrontierEngine:
|
|
| 178 |
|
| 179 |
if self.stats["gold_files"] % 250 == 0: conn.commit()
|
| 180 |
|
| 181 |
-
except:
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
f_vault.close(); self.stats["status"] = "Completed"; self.is_running = False
|
| 185 |
|
|
@@ -191,6 +207,7 @@ class LogicFrontierEngine:
|
|
| 191 |
shutil.copyfileobj(f_in, f_out)
|
| 192 |
self.api.upload_file(path_or_fileobj=gz, path_in_repo=f"{Config.TARGET_DIR}/{gz}", repo_id=Config.TARGET_REPO, repo_type="dataset")
|
| 193 |
self.stats["shards_pushed"] += 1
|
|
|
|
| 194 |
self.last_upload_time = time.time()
|
| 195 |
os.remove(gz)
|
| 196 |
|
|
@@ -200,21 +217,34 @@ app = FastAPI(); engine = LogicFrontierEngine()
|
|
| 200 |
@app.get("/health")
|
| 201 |
def health():
|
| 202 |
uptime = time.time() - engine.stats["start_time"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
return {
|
| 204 |
-
"engine": "
|
| 205 |
"status": engine.stats["status"],
|
| 206 |
-
"
|
|
|
|
| 207 |
"performance": {
|
| 208 |
"processed_total": engine.stats["processed_total"],
|
| 209 |
"gold_files": engine.stats["gold_files"],
|
| 210 |
"success_rate": f"{(engine.stats['gold_files']/max(1, engine.stats['processed_total'])*100):.2f}%",
|
| 211 |
-
"tokens_est": f"{
|
| 212 |
-
"
|
|
|
|
|
|
|
|
|
|
| 213 |
},
|
| 214 |
"data_insights": {
|
| 215 |
"top_languages": dict(engine.stats["lang_distribution"]),
|
| 216 |
"vault_mb": engine.stats["vault_mb"],
|
| 217 |
-
"shards_uploaded": engine.stats["shards_pushed"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
},
|
| 219 |
"reject_analysis": engine.stats["rejects"]
|
| 220 |
}
|
|
@@ -224,7 +254,7 @@ def ping(bt: BackgroundTasks):
|
|
| 224 |
if not engine.is_running:
|
| 225 |
bt.add_task(engine.start_streaming)
|
| 226 |
return {"msg": "Logic Frontier Online. Turbo Mode Enabled."}
|
| 227 |
-
return {"msg": "
|
| 228 |
|
| 229 |
if __name__ == "__main__":
|
| 230 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 63 |
"test_spam": 0,
|
| 64 |
"low_density": 0,
|
| 65 |
"fuzzy": 0
|
| 66 |
+
},
|
| 67 |
+
# --- NEW DATA ADDED ---
|
| 68 |
+
"current_folder": "None",
|
| 69 |
+
"last_shard_time": 0,
|
| 70 |
+
"session_errors": 0
|
| 71 |
}
|
| 72 |
self.is_running = False
|
| 73 |
self.lock = threading.Lock()
|
|
|
|
| 139 |
# --- TURBO LOAD: DIRECT TARGET LANGUAGES ---
|
| 140 |
for folder in Config.TARGET_FOLDERS:
|
| 141 |
try:
|
| 142 |
+
# NEW CODE: Status update
|
| 143 |
+
self.stats["current_folder"] = folder
|
| 144 |
print(f"FRONTIER: Targeting folder {folder}...")
|
| 145 |
dataset = load_dataset(Config.SOURCE_DATASET, data_dir=folder, split="train", streaming=True)
|
| 146 |
|
|
|
|
| 150 |
text = row.get('content', '')
|
| 151 |
lang = row.get('language', 'unknown')
|
| 152 |
|
| 153 |
+
# --- NEW PATCH: OVERRIDE UNKNOWN LANG WITHOUT DELETING OLD LINE ---
|
| 154 |
+
if lang == 'unknown':
|
| 155 |
+
# Logic: Map folder names to ALLOWED_LANGS keys
|
| 156 |
+
mapping = {"python": "Python", "cpp": "C++", "java": "Java", "javascript": "JavaScript", "typescript": "TypeScript", "go": "Go", "rust": "Rust"}
|
| 157 |
+
lang = mapping.get(folder, lang)
|
| 158 |
+
|
| 159 |
if lang not in Config.ALLOWED_LANGS:
|
| 160 |
self.stats["rejects"]["wrong_lang"] += 1
|
| 161 |
continue
|
|
|
|
| 190 |
|
| 191 |
if self.stats["gold_files"] % 250 == 0: conn.commit()
|
| 192 |
|
| 193 |
+
except:
|
| 194 |
+
self.stats["session_errors"] += 1
|
| 195 |
+
continue
|
| 196 |
+
except:
|
| 197 |
+
self.stats["session_errors"] += 1
|
| 198 |
+
continue
|
| 199 |
|
| 200 |
f_vault.close(); self.stats["status"] = "Completed"; self.is_running = False
|
| 201 |
|
|
|
|
| 207 |
shutil.copyfileobj(f_in, f_out)
|
| 208 |
self.api.upload_file(path_or_fileobj=gz, path_in_repo=f"{Config.TARGET_DIR}/{gz}", repo_id=Config.TARGET_REPO, repo_type="dataset")
|
| 209 |
self.stats["shards_pushed"] += 1
|
| 210 |
+
self.stats["last_shard_time"] = time.time()
|
| 211 |
self.last_upload_time = time.time()
|
| 212 |
os.remove(gz)
|
| 213 |
|
|
|
|
| 217 |
@app.get("/health")
|
| 218 |
def health():
|
| 219 |
uptime = time.time() - engine.stats["start_time"]
|
| 220 |
+
# NEW: Enhanced Calculations
|
| 221 |
+
token_val = engine.stats['total_tokens']
|
| 222 |
+
files_per_min = (engine.stats["processed_total"] / max(1, uptime)) * 60
|
| 223 |
+
|
| 224 |
return {
|
| 225 |
+
"engine": "V57 LOGIC-FRONTIER-ELITE",
|
| 226 |
"status": engine.stats["status"],
|
| 227 |
+
"active_folder": engine.stats["current_folder"],
|
| 228 |
+
"uptime_formatted": f"{int(uptime//3600)}h {int((uptime%3600)//60)}m",
|
| 229 |
"performance": {
|
| 230 |
"processed_total": engine.stats["processed_total"],
|
| 231 |
"gold_files": engine.stats["gold_files"],
|
| 232 |
"success_rate": f"{(engine.stats['gold_files']/max(1, engine.stats['processed_total'])*100):.2f}%",
|
| 233 |
+
"tokens_est": f"{token_val/1e6:.2f}M",
|
| 234 |
+
"speed_metrics": {
|
| 235 |
+
"files_per_sec": round(engine.stats["processed_total"] / max(1, uptime), 2),
|
| 236 |
+
"files_per_min": round(files_per_min, 2)
|
| 237 |
+
}
|
| 238 |
},
|
| 239 |
"data_insights": {
|
| 240 |
"top_languages": dict(engine.stats["lang_distribution"]),
|
| 241 |
"vault_mb": engine.stats["vault_mb"],
|
| 242 |
+
"shards_uploaded": engine.stats["shards_pushed"],
|
| 243 |
+
"last_shard_pushed": time.ctime(engine.stats["last_shard_time"]) if engine.stats["last_shard_time"] > 0 else "None"
|
| 244 |
+
},
|
| 245 |
+
"system_health": {
|
| 246 |
+
"internal_errors": engine.stats["session_errors"],
|
| 247 |
+
"db_path": Config.INDEX_DB
|
| 248 |
},
|
| 249 |
"reject_analysis": engine.stats["rejects"]
|
| 250 |
}
|
|
|
|
| 254 |
if not engine.is_running:
|
| 255 |
bt.add_task(engine.start_streaming)
|
| 256 |
return {"msg": "Logic Frontier Online. Turbo Mode Enabled."}
|
| 257 |
+
return {"msg": "Running."}
|
| 258 |
|
| 259 |
if __name__ == "__main__":
|
| 260 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|