Spaces:
Sleeping
Sleeping
Sahil commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
|
|
| 1 |
import os
|
| 2 |
import json
|
| 3 |
import time
|
| 4 |
import threading
|
|
|
|
| 5 |
from datetime import datetime
|
| 6 |
from flask import Flask, request, jsonify, send_from_directory
|
| 7 |
from flask_cors import CORS
|
|
@@ -24,10 +26,36 @@ LEVEL_1_MAX = 20 # Max entries before archiving to Level 2
|
|
| 24 |
LEVEL_2_MAX = 50 # Max entries before archiving to Level 3
|
| 25 |
AUTO_ARCHIVE_ENABLED = True
|
| 26 |
AUTO_ARCHIVE_INTERVAL = 600 # 10 minutes
|
|
|
|
|
|
|
| 27 |
|
| 28 |
# Memory buffer
|
| 29 |
MEMORY_BUFFER = []
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
def load_training_dataset():
|
| 32 |
"""Load existing training data from HuggingFace"""
|
| 33 |
try:
|
|
@@ -82,7 +110,6 @@ def call_openai_gpt4o_mini(prompt):
|
|
| 82 |
def summarize_conversation(query, response):
|
| 83 |
"""Create a summary of the conversation using AI"""
|
| 84 |
if not client:
|
| 85 |
-
# Fallback summary without AI
|
| 86 |
return f"{query[:50]}... -> {response[:50]}..."
|
| 87 |
|
| 88 |
try:
|
|
@@ -98,7 +125,7 @@ def create_super_summary(summaries):
|
|
| 98 |
return "General knowledge compilation"
|
| 99 |
|
| 100 |
try:
|
| 101 |
-
combined = "\n".join(summaries[:10])
|
| 102 |
prompt = f"Create a brief summary of these conversation topics:\n{combined}"
|
| 103 |
super_summary = call_openai_gpt4o_mini(prompt)
|
| 104 |
return super_summary if super_summary else "General knowledge compilation"
|
|
@@ -107,8 +134,6 @@ def create_super_summary(summaries):
|
|
| 107 |
|
| 108 |
def archive_level_1_to_level_2():
|
| 109 |
"""Archive Level 1 entries to Level 2 with compression"""
|
| 110 |
-
global MEMORY_BUFFER
|
| 111 |
-
|
| 112 |
dataset = load_training_dataset()
|
| 113 |
level_1_entries = [d for d in dataset if d.get("level") == 1 and not d.get("archived")]
|
| 114 |
|
|
@@ -131,7 +156,6 @@ def archive_level_1_to_level_2():
|
|
| 131 |
}
|
| 132 |
archived_entries.append(archived_entry)
|
| 133 |
|
| 134 |
-
# Update dataset
|
| 135 |
updated_dataset = [d for d in dataset if d.get("level") != 1 or d.get("archived")]
|
| 136 |
updated_dataset.extend(archived_entries)
|
| 137 |
|
|
@@ -164,7 +188,6 @@ def archive_level_2_to_level_3():
|
|
| 164 |
"entries_compressed": len(level_2_entries)
|
| 165 |
}
|
| 166 |
|
| 167 |
-
# Keep only Level 1, Level 3, and recent Level 2 entries
|
| 168 |
updated_dataset = [d for d in dataset if d.get("level") != 2]
|
| 169 |
updated_dataset.append(level_3_entry)
|
| 170 |
|
|
@@ -173,6 +196,55 @@ def archive_level_2_to_level_3():
|
|
| 173 |
dataset_obj.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
|
| 174 |
print(f"β
Created Level 3 super-summary")
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
def auto_archive_worker():
|
| 177 |
"""Background worker that automatically archives memory levels"""
|
| 178 |
print("ποΈ Auto-archive worker started")
|
|
@@ -197,9 +269,7 @@ def index():
|
|
| 197 |
|
| 198 |
@app.route("/api/chat", methods=["POST"])
|
| 199 |
def chat():
|
| 200 |
-
"""Handle chat requests with
|
| 201 |
-
global MEMORY_BUFFER
|
| 202 |
-
|
| 203 |
data = request.get_json()
|
| 204 |
query = data.get("query", "").strip()
|
| 205 |
|
|
@@ -227,22 +297,25 @@ def chat():
|
|
| 227 |
"archived": False,
|
| 228 |
"level": 1,
|
| 229 |
"counter": 1,
|
| 230 |
-
"timestamp": datetime.now().isoformat()
|
|
|
|
| 231 |
}
|
| 232 |
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
|
|
|
| 239 |
|
| 240 |
return jsonify({
|
| 241 |
"success": True,
|
| 242 |
"response": response,
|
| 243 |
"summary": summary,
|
| 244 |
"level": 1,
|
| 245 |
-
"buffered": len(MEMORY_BUFFER)
|
|
|
|
| 246 |
})
|
| 247 |
|
| 248 |
@app.route("/api/dataset-stats", methods=["GET"])
|
|
@@ -254,6 +327,7 @@ def dataset_stats():
|
|
| 254 |
level_1 = [d for d in training_data if d.get("level") == 1]
|
| 255 |
level_2 = [d for d in training_data if d.get("level") == 2]
|
| 256 |
level_3 = [d for d in training_data if d.get("level") == 3]
|
|
|
|
| 257 |
|
| 258 |
return jsonify({
|
| 259 |
"success": True,
|
|
@@ -262,6 +336,7 @@ def dataset_stats():
|
|
| 262 |
"level_2_archived": len(level_2),
|
| 263 |
"level_3_super": len(level_3),
|
| 264 |
"buffered": len(MEMORY_BUFFER),
|
|
|
|
| 265 |
"dataset_url": f"https://huggingface.co/datasets/{TRAINING_DATASET}"
|
| 266 |
})
|
| 267 |
except Exception as e:
|
|
@@ -298,7 +373,7 @@ def archive_now():
|
|
| 298 |
return jsonify({"error": str(e)}), 500
|
| 299 |
|
| 300 |
if __name__ == "__main__":
|
| 301 |
-
port = int(os.getenv("PORT",
|
| 302 |
|
| 303 |
print("π Starting ContinuumGPT Hierarchical Memory System...")
|
| 304 |
print(f"π Training Dataset: {TRAINING_DATASET}")
|
|
@@ -309,14 +384,22 @@ if __name__ == "__main__":
|
|
| 309 |
print(f" Level 2 (Archived): Compressed summaries (max {LEVEL_2_MAX} before archiving)")
|
| 310 |
print(f" Level 3 (Super): Global knowledge compilation")
|
| 311 |
print(f" Auto-archiving: Every {AUTO_ARCHIVE_INTERVAL} seconds")
|
|
|
|
|
|
|
| 312 |
print("")
|
| 313 |
|
| 314 |
if OPENAI_API_KEY:
|
| 315 |
print("β
OpenAI API Key Configured")
|
|
|
|
| 316 |
# Start auto-archive worker
|
| 317 |
archive_thread = threading.Thread(target=auto_archive_worker, daemon=True)
|
| 318 |
archive_thread.start()
|
| 319 |
print("β
Auto-archive worker started")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
else:
|
| 321 |
print("β οΈ OpenAI API Key Missing - Add OPENAI_API_KEY to enable")
|
| 322 |
|
|
@@ -328,8 +411,11 @@ if __name__ == "__main__":
|
|
| 328 |
level = d.get("level", 1)
|
| 329 |
level_counts[level] = level_counts.get(level, 0) + 1
|
| 330 |
print(f"π Current dataset: L1={level_counts[1]}, L2={level_counts[2]}, L3={level_counts[3]}")
|
|
|
|
|
|
|
|
|
|
| 331 |
else:
|
| 332 |
print("β οΈ HuggingFace Integration Disabled - Add HF_TOKEN to enable")
|
| 333 |
|
| 334 |
print("")
|
| 335 |
-
app.run(host="0.0.0.0", port=port, debug=False, threaded=True)
|
|
|
|
| 1 |
+
|
| 2 |
import os
|
| 3 |
import json
|
| 4 |
import time
|
| 5 |
import threading
|
| 6 |
+
import random
|
| 7 |
from datetime import datetime
|
| 8 |
from flask import Flask, request, jsonify, send_from_directory
|
| 9 |
from flask_cors import CORS
|
|
|
|
| 26 |
LEVEL_2_MAX = 50 # Max entries before archiving to Level 3
|
| 27 |
AUTO_ARCHIVE_ENABLED = True
|
| 28 |
AUTO_ARCHIVE_INTERVAL = 600 # 10 minutes
|
| 29 |
+
AUTO_TRAINING_INTERVAL = 300 # 5 minutes - auto-generate training examples
|
| 30 |
+
AUTO_SAVE_ENABLED = True # Always save responses immediately
|
| 31 |
|
| 32 |
# Memory buffer
|
| 33 |
MEMORY_BUFFER = []
|
| 34 |
|
| 35 |
+
# Auto-training prompts for diverse learning
|
| 36 |
+
AUTO_TRAINING_PROMPTS = [
|
| 37 |
+
"Explain quantum computing in simple terms",
|
| 38 |
+
"How does machine learning work?",
|
| 39 |
+
"What are the best practices for web development?",
|
| 40 |
+
"Write a Python function to sort a list",
|
| 41 |
+
"Explain the difference between AI and ML",
|
| 42 |
+
"How do neural networks learn?",
|
| 43 |
+
"What is the future of artificial intelligence?",
|
| 44 |
+
"Create a creative story about space exploration",
|
| 45 |
+
"Explain blockchain technology",
|
| 46 |
+
"How does natural language processing work?",
|
| 47 |
+
"What are design patterns in software engineering?",
|
| 48 |
+
"Explain the concept of recursion with examples",
|
| 49 |
+
"How do databases manage concurrent transactions?",
|
| 50 |
+
"What is the difference between REST and GraphQL?",
|
| 51 |
+
"Explain cloud computing and its benefits",
|
| 52 |
+
"How does encryption work?",
|
| 53 |
+
"What are the principles of good UI/UX design?",
|
| 54 |
+
"Explain the concept of Big O notation",
|
| 55 |
+
"How do search engines rank websites?",
|
| 56 |
+
"What is containerization and why is it useful?"
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
def load_training_dataset():
|
| 60 |
"""Load existing training data from HuggingFace"""
|
| 61 |
try:
|
|
|
|
| 110 |
def summarize_conversation(query, response):
|
| 111 |
"""Create a summary of the conversation using AI"""
|
| 112 |
if not client:
|
|
|
|
| 113 |
return f"{query[:50]}... -> {response[:50]}..."
|
| 114 |
|
| 115 |
try:
|
|
|
|
| 125 |
return "General knowledge compilation"
|
| 126 |
|
| 127 |
try:
|
| 128 |
+
combined = "\n".join(summaries[:10])
|
| 129 |
prompt = f"Create a brief summary of these conversation topics:\n{combined}"
|
| 130 |
super_summary = call_openai_gpt4o_mini(prompt)
|
| 131 |
return super_summary if super_summary else "General knowledge compilation"
|
|
|
|
| 134 |
|
| 135 |
def archive_level_1_to_level_2():
|
| 136 |
"""Archive Level 1 entries to Level 2 with compression"""
|
|
|
|
|
|
|
| 137 |
dataset = load_training_dataset()
|
| 138 |
level_1_entries = [d for d in dataset if d.get("level") == 1 and not d.get("archived")]
|
| 139 |
|
|
|
|
| 156 |
}
|
| 157 |
archived_entries.append(archived_entry)
|
| 158 |
|
|
|
|
| 159 |
updated_dataset = [d for d in dataset if d.get("level") != 1 or d.get("archived")]
|
| 160 |
updated_dataset.extend(archived_entries)
|
| 161 |
|
|
|
|
| 188 |
"entries_compressed": len(level_2_entries)
|
| 189 |
}
|
| 190 |
|
|
|
|
| 191 |
updated_dataset = [d for d in dataset if d.get("level") != 2]
|
| 192 |
updated_dataset.append(level_3_entry)
|
| 193 |
|
|
|
|
| 196 |
dataset_obj.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
|
| 197 |
print(f"β
Created Level 3 super-summary")
|
| 198 |
|
| 199 |
+
def auto_training_worker():
|
| 200 |
+
"""Background worker that automatically generates training examples"""
|
| 201 |
+
print("π€ Auto-training worker started")
|
| 202 |
+
training_count = 0
|
| 203 |
+
|
| 204 |
+
while True:
|
| 205 |
+
try:
|
| 206 |
+
if not HF_TOKEN or not client:
|
| 207 |
+
print("β οΈ Auto-training paused - need HF_TOKEN and OPENAI_API_KEY")
|
| 208 |
+
time.sleep(AUTO_TRAINING_INTERVAL)
|
| 209 |
+
continue
|
| 210 |
+
|
| 211 |
+
# Select random training prompt
|
| 212 |
+
prompt = random.choice(AUTO_TRAINING_PROMPTS)
|
| 213 |
+
print(f"\nπ Auto-training #{training_count + 1}: '{prompt[:50]}...'")
|
| 214 |
+
|
| 215 |
+
# Generate response
|
| 216 |
+
response = call_openai_gpt4o_mini(prompt)
|
| 217 |
+
if not response:
|
| 218 |
+
print("β Failed to generate response")
|
| 219 |
+
time.sleep(AUTO_TRAINING_INTERVAL)
|
| 220 |
+
continue
|
| 221 |
+
|
| 222 |
+
# Create summary
|
| 223 |
+
summary = summarize_conversation(prompt, response)
|
| 224 |
+
|
| 225 |
+
# Create Level 1 memory entry
|
| 226 |
+
memory_entry = {
|
| 227 |
+
"query": prompt,
|
| 228 |
+
"response": response,
|
| 229 |
+
"summary": summary,
|
| 230 |
+
"archived": False,
|
| 231 |
+
"level": 1,
|
| 232 |
+
"counter": 1,
|
| 233 |
+
"timestamp": datetime.now().isoformat(),
|
| 234 |
+
"auto_generated": True
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
# Save immediately to dataset
|
| 238 |
+
if save_to_training_dataset([memory_entry]):
|
| 239 |
+
training_count += 1
|
| 240 |
+
print(f"β
Auto-trained and saved! Total auto-training: {training_count}")
|
| 241 |
+
|
| 242 |
+
time.sleep(AUTO_TRAINING_INTERVAL)
|
| 243 |
+
|
| 244 |
+
except Exception as e:
|
| 245 |
+
print(f"β Auto-training error: {e}")
|
| 246 |
+
time.sleep(AUTO_TRAINING_INTERVAL)
|
| 247 |
+
|
| 248 |
def auto_archive_worker():
|
| 249 |
"""Background worker that automatically archives memory levels"""
|
| 250 |
print("ποΈ Auto-archive worker started")
|
|
|
|
| 269 |
|
| 270 |
@app.route("/api/chat", methods=["POST"])
|
| 271 |
def chat():
|
| 272 |
+
"""Handle chat requests with automatic dataset saving"""
|
|
|
|
|
|
|
| 273 |
data = request.get_json()
|
| 274 |
query = data.get("query", "").strip()
|
| 275 |
|
|
|
|
| 297 |
"archived": False,
|
| 298 |
"level": 1,
|
| 299 |
"counter": 1,
|
| 300 |
+
"timestamp": datetime.now().isoformat(),
|
| 301 |
+
"auto_generated": False
|
| 302 |
}
|
| 303 |
|
| 304 |
+
# AUTO-SAVE: Save immediately to HuggingFace
|
| 305 |
+
if AUTO_SAVE_ENABLED and HF_TOKEN:
|
| 306 |
+
save_to_training_dataset([memory_entry])
|
| 307 |
+
saved_status = "saved"
|
| 308 |
+
else:
|
| 309 |
+
MEMORY_BUFFER.append(memory_entry)
|
| 310 |
+
saved_status = "buffered"
|
| 311 |
|
| 312 |
return jsonify({
|
| 313 |
"success": True,
|
| 314 |
"response": response,
|
| 315 |
"summary": summary,
|
| 316 |
"level": 1,
|
| 317 |
+
"buffered": len(MEMORY_BUFFER),
|
| 318 |
+
"saved": saved_status
|
| 319 |
})
|
| 320 |
|
| 321 |
@app.route("/api/dataset-stats", methods=["GET"])
|
|
|
|
| 327 |
level_1 = [d for d in training_data if d.get("level") == 1]
|
| 328 |
level_2 = [d for d in training_data if d.get("level") == 2]
|
| 329 |
level_3 = [d for d in training_data if d.get("level") == 3]
|
| 330 |
+
auto_generated = [d for d in training_data if d.get("auto_generated")]
|
| 331 |
|
| 332 |
return jsonify({
|
| 333 |
"success": True,
|
|
|
|
| 336 |
"level_2_archived": len(level_2),
|
| 337 |
"level_3_super": len(level_3),
|
| 338 |
"buffered": len(MEMORY_BUFFER),
|
| 339 |
+
"auto_generated_count": len(auto_generated),
|
| 340 |
"dataset_url": f"https://huggingface.co/datasets/{TRAINING_DATASET}"
|
| 341 |
})
|
| 342 |
except Exception as e:
|
|
|
|
| 373 |
return jsonify({"error": str(e)}), 500
|
| 374 |
|
| 375 |
if __name__ == "__main__":
|
| 376 |
+
port = int(os.getenv("PORT", 5000))
|
| 377 |
|
| 378 |
print("π Starting ContinuumGPT Hierarchical Memory System...")
|
| 379 |
print(f"π Training Dataset: {TRAINING_DATASET}")
|
|
|
|
| 384 |
print(f" Level 2 (Archived): Compressed summaries (max {LEVEL_2_MAX} before archiving)")
|
| 385 |
print(f" Level 3 (Super): Global knowledge compilation")
|
| 386 |
print(f" Auto-archiving: Every {AUTO_ARCHIVE_INTERVAL} seconds")
|
| 387 |
+
print(f" Auto-training: Every {AUTO_TRAINING_INTERVAL} seconds")
|
| 388 |
+
print(f" Auto-save: {'ENABLED' if AUTO_SAVE_ENABLED else 'DISABLED'}")
|
| 389 |
print("")
|
| 390 |
|
| 391 |
if OPENAI_API_KEY:
|
| 392 |
print("β
OpenAI API Key Configured")
|
| 393 |
+
|
| 394 |
# Start auto-archive worker
|
| 395 |
archive_thread = threading.Thread(target=auto_archive_worker, daemon=True)
|
| 396 |
archive_thread.start()
|
| 397 |
print("β
Auto-archive worker started")
|
| 398 |
+
|
| 399 |
+
# Start auto-training worker
|
| 400 |
+
training_thread = threading.Thread(target=auto_training_worker, daemon=True)
|
| 401 |
+
training_thread.start()
|
| 402 |
+
print("β
Auto-training worker started - will generate examples every 5 minutes")
|
| 403 |
else:
|
| 404 |
print("β οΈ OpenAI API Key Missing - Add OPENAI_API_KEY to enable")
|
| 405 |
|
|
|
|
| 411 |
level = d.get("level", 1)
|
| 412 |
level_counts[level] = level_counts.get(level, 0) + 1
|
| 413 |
print(f"π Current dataset: L1={level_counts[1]}, L2={level_counts[2]}, L3={level_counts[3]}")
|
| 414 |
+
|
| 415 |
+
if AUTO_SAVE_ENABLED:
|
| 416 |
+
print("πΎ Auto-save ENABLED - all responses saved immediately to dataset")
|
| 417 |
else:
|
| 418 |
print("β οΈ HuggingFace Integration Disabled - Add HF_TOKEN to enable")
|
| 419 |
|
| 420 |
print("")
|
| 421 |
+
app.run(host="0.0.0.0", port=port, debug=False, threaded=True)
|