Sahil commited on
Commit
d88feea
Β·
verified Β·
1 Parent(s): c03fd2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -19
app.py CHANGED
@@ -1,7 +1,9 @@
 
1
  import os
2
  import json
3
  import time
4
  import threading
 
5
  from datetime import datetime
6
  from flask import Flask, request, jsonify, send_from_directory
7
  from flask_cors import CORS
@@ -24,10 +26,36 @@ LEVEL_1_MAX = 20 # Max entries before archiving to Level 2
24
  LEVEL_2_MAX = 50 # Max entries before archiving to Level 3
25
  AUTO_ARCHIVE_ENABLED = True
26
  AUTO_ARCHIVE_INTERVAL = 600 # 10 minutes
 
 
27
 
28
  # Memory buffer
29
  MEMORY_BUFFER = []
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def load_training_dataset():
32
  """Load existing training data from HuggingFace"""
33
  try:
@@ -82,7 +110,6 @@ def call_openai_gpt4o_mini(prompt):
82
  def summarize_conversation(query, response):
83
  """Create a summary of the conversation using AI"""
84
  if not client:
85
- # Fallback summary without AI
86
  return f"{query[:50]}... -> {response[:50]}..."
87
 
88
  try:
@@ -98,7 +125,7 @@ def create_super_summary(summaries):
98
  return "General knowledge compilation"
99
 
100
  try:
101
- combined = "\n".join(summaries[:10]) # Use last 10 summaries
102
  prompt = f"Create a brief summary of these conversation topics:\n{combined}"
103
  super_summary = call_openai_gpt4o_mini(prompt)
104
  return super_summary if super_summary else "General knowledge compilation"
@@ -107,8 +134,6 @@ def create_super_summary(summaries):
107
 
108
  def archive_level_1_to_level_2():
109
  """Archive Level 1 entries to Level 2 with compression"""
110
- global MEMORY_BUFFER
111
-
112
  dataset = load_training_dataset()
113
  level_1_entries = [d for d in dataset if d.get("level") == 1 and not d.get("archived")]
114
 
@@ -131,7 +156,6 @@ def archive_level_1_to_level_2():
131
  }
132
  archived_entries.append(archived_entry)
133
 
134
- # Update dataset
135
  updated_dataset = [d for d in dataset if d.get("level") != 1 or d.get("archived")]
136
  updated_dataset.extend(archived_entries)
137
 
@@ -164,7 +188,6 @@ def archive_level_2_to_level_3():
164
  "entries_compressed": len(level_2_entries)
165
  }
166
 
167
- # Keep only Level 1, Level 3, and recent Level 2 entries
168
  updated_dataset = [d for d in dataset if d.get("level") != 2]
169
  updated_dataset.append(level_3_entry)
170
 
@@ -173,6 +196,55 @@ def archive_level_2_to_level_3():
173
  dataset_obj.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
174
  print(f"βœ… Created Level 3 super-summary")
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  def auto_archive_worker():
177
  """Background worker that automatically archives memory levels"""
178
  print("πŸ—„οΈ Auto-archive worker started")
@@ -197,9 +269,7 @@ def index():
197
 
198
  @app.route("/api/chat", methods=["POST"])
199
  def chat():
200
- """Handle chat requests with hierarchical memory learning"""
201
- global MEMORY_BUFFER
202
-
203
  data = request.get_json()
204
  query = data.get("query", "").strip()
205
 
@@ -227,22 +297,25 @@ def chat():
227
  "archived": False,
228
  "level": 1,
229
  "counter": 1,
230
- "timestamp": datetime.now().isoformat()
 
231
  }
232
 
233
- MEMORY_BUFFER.append(memory_entry)
234
-
235
- # Save to HuggingFace if we have enough entries
236
- if len(MEMORY_BUFFER) >= 5:
237
- if save_to_training_dataset(MEMORY_BUFFER.copy()):
238
- MEMORY_BUFFER.clear()
 
239
 
240
  return jsonify({
241
  "success": True,
242
  "response": response,
243
  "summary": summary,
244
  "level": 1,
245
- "buffered": len(MEMORY_BUFFER)
 
246
  })
247
 
248
  @app.route("/api/dataset-stats", methods=["GET"])
@@ -254,6 +327,7 @@ def dataset_stats():
254
  level_1 = [d for d in training_data if d.get("level") == 1]
255
  level_2 = [d for d in training_data if d.get("level") == 2]
256
  level_3 = [d for d in training_data if d.get("level") == 3]
 
257
 
258
  return jsonify({
259
  "success": True,
@@ -262,6 +336,7 @@ def dataset_stats():
262
  "level_2_archived": len(level_2),
263
  "level_3_super": len(level_3),
264
  "buffered": len(MEMORY_BUFFER),
 
265
  "dataset_url": f"https://huggingface.co/datasets/{TRAINING_DATASET}"
266
  })
267
  except Exception as e:
@@ -298,7 +373,7 @@ def archive_now():
298
  return jsonify({"error": str(e)}), 500
299
 
300
  if __name__ == "__main__":
301
- port = int(os.getenv("PORT", 7860))
302
 
303
  print("πŸš€ Starting ContinuumGPT Hierarchical Memory System...")
304
  print(f"πŸ“Š Training Dataset: {TRAINING_DATASET}")
@@ -309,14 +384,22 @@ if __name__ == "__main__":
309
  print(f" Level 2 (Archived): Compressed summaries (max {LEVEL_2_MAX} before archiving)")
310
  print(f" Level 3 (Super): Global knowledge compilation")
311
  print(f" Auto-archiving: Every {AUTO_ARCHIVE_INTERVAL} seconds")
 
 
312
  print("")
313
 
314
  if OPENAI_API_KEY:
315
  print("βœ… OpenAI API Key Configured")
 
316
  # Start auto-archive worker
317
  archive_thread = threading.Thread(target=auto_archive_worker, daemon=True)
318
  archive_thread.start()
319
  print("βœ… Auto-archive worker started")
 
 
 
 
 
320
  else:
321
  print("⚠️ OpenAI API Key Missing - Add OPENAI_API_KEY to enable")
322
 
@@ -328,8 +411,11 @@ if __name__ == "__main__":
328
  level = d.get("level", 1)
329
  level_counts[level] = level_counts.get(level, 0) + 1
330
  print(f"πŸ“š Current dataset: L1={level_counts[1]}, L2={level_counts[2]}, L3={level_counts[3]}")
 
 
 
331
  else:
332
  print("⚠️ HuggingFace Integration Disabled - Add HF_TOKEN to enable")
333
 
334
  print("")
335
- app.run(host="0.0.0.0", port=port, debug=False, threaded=True)
 
1
+
2
  import os
3
  import json
4
  import time
5
  import threading
6
+ import random
7
  from datetime import datetime
8
  from flask import Flask, request, jsonify, send_from_directory
9
  from flask_cors import CORS
 
26
  LEVEL_2_MAX = 50 # Max entries before archiving to Level 3
27
  AUTO_ARCHIVE_ENABLED = True
28
  AUTO_ARCHIVE_INTERVAL = 600 # 10 minutes
29
+ AUTO_TRAINING_INTERVAL = 300 # 5 minutes - auto-generate training examples
30
+ AUTO_SAVE_ENABLED = True # Always save responses immediately
31
 
32
  # Memory buffer
33
  MEMORY_BUFFER = []
34
 
35
+ # Auto-training prompts for diverse learning
36
+ AUTO_TRAINING_PROMPTS = [
37
+ "Explain quantum computing in simple terms",
38
+ "How does machine learning work?",
39
+ "What are the best practices for web development?",
40
+ "Write a Python function to sort a list",
41
+ "Explain the difference between AI and ML",
42
+ "How do neural networks learn?",
43
+ "What is the future of artificial intelligence?",
44
+ "Create a creative story about space exploration",
45
+ "Explain blockchain technology",
46
+ "How does natural language processing work?",
47
+ "What are design patterns in software engineering?",
48
+ "Explain the concept of recursion with examples",
49
+ "How do databases manage concurrent transactions?",
50
+ "What is the difference between REST and GraphQL?",
51
+ "Explain cloud computing and its benefits",
52
+ "How does encryption work?",
53
+ "What are the principles of good UI/UX design?",
54
+ "Explain the concept of Big O notation",
55
+ "How do search engines rank websites?",
56
+ "What is containerization and why is it useful?"
57
+ ]
58
+
59
  def load_training_dataset():
60
  """Load existing training data from HuggingFace"""
61
  try:
 
110
  def summarize_conversation(query, response):
111
  """Create a summary of the conversation using AI"""
112
  if not client:
 
113
  return f"{query[:50]}... -> {response[:50]}..."
114
 
115
  try:
 
125
  return "General knowledge compilation"
126
 
127
  try:
128
+ combined = "\n".join(summaries[:10])
129
  prompt = f"Create a brief summary of these conversation topics:\n{combined}"
130
  super_summary = call_openai_gpt4o_mini(prompt)
131
  return super_summary if super_summary else "General knowledge compilation"
 
134
 
135
  def archive_level_1_to_level_2():
136
  """Archive Level 1 entries to Level 2 with compression"""
 
 
137
  dataset = load_training_dataset()
138
  level_1_entries = [d for d in dataset if d.get("level") == 1 and not d.get("archived")]
139
 
 
156
  }
157
  archived_entries.append(archived_entry)
158
 
 
159
  updated_dataset = [d for d in dataset if d.get("level") != 1 or d.get("archived")]
160
  updated_dataset.extend(archived_entries)
161
 
 
188
  "entries_compressed": len(level_2_entries)
189
  }
190
 
 
191
  updated_dataset = [d for d in dataset if d.get("level") != 2]
192
  updated_dataset.append(level_3_entry)
193
 
 
196
  dataset_obj.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
197
  print(f"βœ… Created Level 3 super-summary")
198
 
199
+ def auto_training_worker():
200
+ """Background worker that automatically generates training examples"""
201
+ print("πŸ€– Auto-training worker started")
202
+ training_count = 0
203
+
204
+ while True:
205
+ try:
206
+ if not HF_TOKEN or not client:
207
+ print("⚠️ Auto-training paused - need HF_TOKEN and OPENAI_API_KEY")
208
+ time.sleep(AUTO_TRAINING_INTERVAL)
209
+ continue
210
+
211
+ # Select random training prompt
212
+ prompt = random.choice(AUTO_TRAINING_PROMPTS)
213
+ print(f"\nπŸŽ“ Auto-training #{training_count + 1}: '{prompt[:50]}...'")
214
+
215
+ # Generate response
216
+ response = call_openai_gpt4o_mini(prompt)
217
+ if not response:
218
+ print("❌ Failed to generate response")
219
+ time.sleep(AUTO_TRAINING_INTERVAL)
220
+ continue
221
+
222
+ # Create summary
223
+ summary = summarize_conversation(prompt, response)
224
+
225
+ # Create Level 1 memory entry
226
+ memory_entry = {
227
+ "query": prompt,
228
+ "response": response,
229
+ "summary": summary,
230
+ "archived": False,
231
+ "level": 1,
232
+ "counter": 1,
233
+ "timestamp": datetime.now().isoformat(),
234
+ "auto_generated": True
235
+ }
236
+
237
+ # Save immediately to dataset
238
+ if save_to_training_dataset([memory_entry]):
239
+ training_count += 1
240
+ print(f"βœ… Auto-trained and saved! Total auto-training: {training_count}")
241
+
242
+ time.sleep(AUTO_TRAINING_INTERVAL)
243
+
244
+ except Exception as e:
245
+ print(f"❌ Auto-training error: {e}")
246
+ time.sleep(AUTO_TRAINING_INTERVAL)
247
+
248
  def auto_archive_worker():
249
  """Background worker that automatically archives memory levels"""
250
  print("πŸ—„οΈ Auto-archive worker started")
 
269
 
270
  @app.route("/api/chat", methods=["POST"])
271
  def chat():
272
+ """Handle chat requests with automatic dataset saving"""
 
 
273
  data = request.get_json()
274
  query = data.get("query", "").strip()
275
 
 
297
  "archived": False,
298
  "level": 1,
299
  "counter": 1,
300
+ "timestamp": datetime.now().isoformat(),
301
+ "auto_generated": False
302
  }
303
 
304
+ # AUTO-SAVE: Save immediately to HuggingFace
305
+ if AUTO_SAVE_ENABLED and HF_TOKEN:
306
+ save_to_training_dataset([memory_entry])
307
+ saved_status = "saved"
308
+ else:
309
+ MEMORY_BUFFER.append(memory_entry)
310
+ saved_status = "buffered"
311
 
312
  return jsonify({
313
  "success": True,
314
  "response": response,
315
  "summary": summary,
316
  "level": 1,
317
+ "buffered": len(MEMORY_BUFFER),
318
+ "saved": saved_status
319
  })
320
 
321
  @app.route("/api/dataset-stats", methods=["GET"])
 
327
  level_1 = [d for d in training_data if d.get("level") == 1]
328
  level_2 = [d for d in training_data if d.get("level") == 2]
329
  level_3 = [d for d in training_data if d.get("level") == 3]
330
+ auto_generated = [d for d in training_data if d.get("auto_generated")]
331
 
332
  return jsonify({
333
  "success": True,
 
336
  "level_2_archived": len(level_2),
337
  "level_3_super": len(level_3),
338
  "buffered": len(MEMORY_BUFFER),
339
+ "auto_generated_count": len(auto_generated),
340
  "dataset_url": f"https://huggingface.co/datasets/{TRAINING_DATASET}"
341
  })
342
  except Exception as e:
 
373
  return jsonify({"error": str(e)}), 500
374
 
375
  if __name__ == "__main__":
376
+ port = int(os.getenv("PORT", 5000))
377
 
378
  print("πŸš€ Starting ContinuumGPT Hierarchical Memory System...")
379
  print(f"πŸ“Š Training Dataset: {TRAINING_DATASET}")
 
384
  print(f" Level 2 (Archived): Compressed summaries (max {LEVEL_2_MAX} before archiving)")
385
  print(f" Level 3 (Super): Global knowledge compilation")
386
  print(f" Auto-archiving: Every {AUTO_ARCHIVE_INTERVAL} seconds")
387
+ print(f" Auto-training: Every {AUTO_TRAINING_INTERVAL} seconds")
388
+ print(f" Auto-save: {'ENABLED' if AUTO_SAVE_ENABLED else 'DISABLED'}")
389
  print("")
390
 
391
  if OPENAI_API_KEY:
392
  print("βœ… OpenAI API Key Configured")
393
+
394
  # Start auto-archive worker
395
  archive_thread = threading.Thread(target=auto_archive_worker, daemon=True)
396
  archive_thread.start()
397
  print("βœ… Auto-archive worker started")
398
+
399
+ # Start auto-training worker
400
+ training_thread = threading.Thread(target=auto_training_worker, daemon=True)
401
+ training_thread.start()
402
+ print("βœ… Auto-training worker started - will generate examples every 5 minutes")
403
  else:
404
  print("⚠️ OpenAI API Key Missing - Add OPENAI_API_KEY to enable")
405
 
 
411
  level = d.get("level", 1)
412
  level_counts[level] = level_counts.get(level, 0) + 1
413
  print(f"πŸ“š Current dataset: L1={level_counts[1]}, L2={level_counts[2]}, L3={level_counts[3]}")
414
+
415
+ if AUTO_SAVE_ENABLED:
416
+ print("πŸ’Ύ Auto-save ENABLED - all responses saved immediately to dataset")
417
  else:
418
  print("⚠️ HuggingFace Integration Disabled - Add HF_TOKEN to enable")
419
 
420
  print("")
421
+ app.run(host="0.0.0.0", port=port, debug=False, threaded=True)