Sahil commited on
Commit
9b276f2
Β·
verified Β·
1 Parent(s): 1800769

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +199 -251
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import os
3
  import json
4
  import time
@@ -8,7 +7,6 @@ from flask import Flask, request, jsonify, send_from_directory
8
  from flask_cors import CORS
9
  from datasets import load_dataset, Dataset
10
  from openai import OpenAI
11
- import random
12
 
13
  app = Flask(__name__, static_folder=".", static_url_path="")
14
  CORS(app, supports_credentials=True)
@@ -20,57 +18,15 @@ client = OpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
20
  # HuggingFace Configuration
21
  HF_TOKEN = os.getenv("HF_TOKEN")
22
  TRAINING_DATASET = "Sahil5112/ContinuumGPT"
23
- CONVERSATION_BUFFER = []
24
- MAX_BUFFER_SIZE = 10
25
-
26
- # Auto-training configuration
27
- AUTO_TRAINING_ENABLED = True
28
- AUTO_TRAINING_INTERVAL = 300 # 5 minutes between auto-training sessions
29
- TRAINING_PROMPTS_PER_SESSION = 3 # Number of prompts to generate per session
30
-
31
- # Diverse training prompt templates
32
- TRAINING_TEMPLATES = [
33
- # Questions
34
- "What is {topic}?",
35
- "How does {topic} work?",
36
- "Explain {topic} in simple terms",
37
- "What are the benefits of {topic}?",
38
- "What are common mistakes with {topic}?",
39
-
40
- # Technical
41
- "Write a Python function to {task}",
42
- "How do I implement {feature} in JavaScript?",
43
- "Debug this code: {code_snippet}",
44
- "Best practices for {topic}",
45
- "Compare {concept1} vs {concept2}",
46
-
47
- # Creative
48
- "Write a short story about {topic}",
49
- "Create a poem about {theme}",
50
- "Describe {object} creatively",
51
-
52
- # Analysis
53
- "Analyze the pros and cons of {topic}",
54
- "What trends are emerging in {field}?",
55
- "Predict the future of {technology}",
56
-
57
- # Practical
58
- "Give me tips for {activity}",
59
- "How can I improve my {skill}?",
60
- "What should I know about {subject}?"
61
- ]
62
-
63
- TRAINING_TOPICS = [
64
- "machine learning", "web development", "Python programming", "data science",
65
- "artificial intelligence", "cloud computing", "cybersecurity", "blockchain",
66
- "mobile apps", "APIs", "databases", "React", "Node.js", "algorithms",
67
- "leadership", "productivity", "time management", "communication",
68
- "creativity", "problem solving", "critical thinking", "decision making",
69
- "space exploration", "climate change", "renewable energy", "quantum computing",
70
- "biotechnology", "robotics", "virtual reality", "augmented reality",
71
- "healthy eating", "exercise", "meditation", "stress management",
72
- "financial planning", "investing", "entrepreneurship", "marketing"
73
- ]
74
 
75
  def load_training_dataset():
76
  """Load existing training data from HuggingFace"""
@@ -96,8 +52,8 @@ def save_to_training_dataset(training_examples):
96
  existing_data.extend(training_examples)
97
  dataset = Dataset.from_list(existing_data)
98
  dataset.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
99
- print(f"βœ… Saved {len(training_examples)} training examples to {TRAINING_DATASET}")
100
- print(f"πŸ“Š Total dataset size: {len(existing_data)} examples")
101
  return True
102
  except Exception as e:
103
  print(f"❌ Error saving to dataset: {e}")
@@ -106,224 +62,206 @@ def save_to_training_dataset(training_examples):
106
  def call_openai_gpt4o_mini(prompt):
107
  """Call OpenAI GPT-4o-mini"""
108
  if not client:
109
- return {
110
- "success": False,
111
- "error": "OPENAI_API_KEY not set. Please add your OpenAI API key to enable AI model training.",
112
- "response": None
113
- }
114
 
115
  try:
116
  response = client.chat.completions.create(
117
  model="gpt-4o-mini",
118
  messages=[
119
- {"role": "system", "content": "You are a helpful AI assistant providing training data for ContinuumGPT."},
120
  {"role": "user", "content": prompt}
121
  ],
122
  temperature=0.7,
123
  max_tokens=1000
124
  )
 
 
 
 
125
 
126
- generated_text = response.choices[0].message.content
 
 
 
 
127
 
128
- return {
129
- "success": True,
130
- "error": None,
131
- "response": generated_text
132
- }
133
- except Exception as e:
134
- return {
135
- "success": False,
136
- "error": f"OpenAI API Error: {str(e)}",
137
- "response": None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  }
 
139
 
140
- def generate_training_prompt():
141
- """Generate a diverse training prompt"""
142
- template = random.choice(TRAINING_TEMPLATES)
143
-
144
- if "{topic}" in template:
145
- topic = random.choice(TRAINING_TOPICS)
146
- return template.replace("{topic}", topic)
147
- elif "{task}" in template:
148
- tasks = ["sort a list", "reverse a string", "calculate factorial", "find prime numbers", "merge arrays"]
149
- return template.replace("{task}", random.choice(tasks))
150
- elif "{feature}" in template:
151
- features = ["authentication", "form validation", "API calls", "state management", "routing"]
152
- return template.replace("{feature}", random.choice(features))
153
- elif "{theme}" in template:
154
- themes = ["technology", "nature", "future", "dreams", "adventure"]
155
- return template.replace("{theme}", random.choice(themes))
156
- elif "{field}" in template:
157
- fields = ["AI", "software development", "healthcare", "education", "finance"]
158
- return template.replace("{field}", random.choice(fields))
159
- elif "{concept1}" in template and "{concept2}" in template:
160
- pairs = [("SQL", "NoSQL"), ("REST", "GraphQL"), ("React", "Vue"), ("Python", "JavaScript")]
161
- pair = random.choice(pairs)
162
- return template.replace("{concept1}", pair[0]).replace("{concept2}", pair[1])
163
- else:
164
- return template
 
 
 
 
 
 
 
165
 
166
- def auto_train_worker():
167
- """Background worker that continuously trains the model"""
168
- print("πŸ€– Auto-training worker started")
169
-
170
- while AUTO_TRAINING_ENABLED:
 
 
 
 
 
 
 
 
 
171
  try:
172
- if not client:
173
- print("⚠️ Auto-training paused - OPENAI_API_KEY not set")
174
- time.sleep(AUTO_TRAINING_INTERVAL)
175
- continue
176
-
177
- print(f"\nπŸŽ“ Starting auto-training session ({TRAINING_PROMPTS_PER_SESSION} prompts)...")
178
- session_examples = []
179
-
180
- for i in range(TRAINING_PROMPTS_PER_SESSION):
181
- # Generate diverse prompt
182
- prompt = generate_training_prompt()
183
- print(f" πŸ“ Prompt {i+1}: {prompt[:60]}...")
184
-
185
- # Get AI response
186
- result = call_openai_gpt4o_mini(prompt)
187
-
188
- if result["success"]:
189
- # Create training entry
190
- training_entry = {
191
- "input": prompt,
192
- "output": result["response"],
193
- "model_used": "gpt-4o-mini",
194
- "timestamp": datetime.now().isoformat(),
195
- "training_id": str(time.time()),
196
- "learning_score": 1.0,
197
- "is_new_learning": True,
198
- "auto_generated": True,
199
- "context": {
200
- "query_length": len(prompt),
201
- "response_length": len(result["response"]),
202
- "training_mode": "auto_openai_gpt4o_mini",
203
- "source": "auto_training"
204
- }
205
- }
206
-
207
- session_examples.append(training_entry)
208
- CONVERSATION_BUFFER.append(training_entry)
209
- print(f" βœ… Response generated ({len(result['response'])} chars)")
210
- else:
211
- print(f" ❌ Error: {result['error']}")
212
-
213
- # Small delay between requests to avoid rate limits
214
- time.sleep(2)
215
-
216
- # Save to HuggingFace if buffer is full or session complete
217
- if len(CONVERSATION_BUFFER) >= MAX_BUFFER_SIZE:
218
- print(f"\nπŸ’Ύ Auto-saving {len(CONVERSATION_BUFFER)} examples to HuggingFace...")
219
- if save_to_training_dataset(CONVERSATION_BUFFER.copy()):
220
- CONVERSATION_BUFFER.clear()
221
- print("βœ… Auto-save successful")
222
- else:
223
- print("❌ Auto-save failed (will retry next session)")
224
-
225
- dataset_size = len(load_training_dataset())
226
- print(f"πŸ“Š Dataset now contains {dataset_size} total examples")
227
- print(f"⏳ Next auto-training session in {AUTO_TRAINING_INTERVAL}s...\n")
228
-
229
  except Exception as e:
230
- print(f"❌ Auto-training error: {e}")
231
-
232
- time.sleep(AUTO_TRAINING_INTERVAL)
233
 
234
  @app.route("/")
235
  def index():
236
  return send_from_directory(".", "index.html")
237
 
238
- @app.route("/api/generate", methods=["POST"])
239
- def generate_response():
240
- """Generate AI response using OpenAI GPT-4o-mini"""
 
 
241
  data = request.get_json()
242
- prompt = data.get("prompt", "").strip()
243
 
244
- if not prompt:
245
- return jsonify({"success": False, "error": "Missing prompt"}), 400
246
 
247
- result = call_openai_gpt4o_mini(prompt)
 
248
 
249
- if result["success"]:
250
- return jsonify({
251
- "success": True,
252
- "response": result["response"],
253
- "model": "gpt-4o-mini"
254
- })
255
- else:
256
  return jsonify({
257
  "success": False,
258
- "error": result["error"],
259
- "model": "gpt-4o-mini"
260
  })
261
 
262
- @app.route("/api/train", methods=["POST"])
263
- def train_model():
264
- """Process AI model response and save as training data"""
265
- global CONVERSATION_BUFFER
266
-
267
- data = request.get_json()
268
- user_input = data.get("user_input", "").strip()
269
- ai_response = data.get("ai_response", "").strip()
270
- model_used = data.get("model_used", "gpt-4o-mini")
271
-
272
- if not user_input or not ai_response:
273
- return jsonify({"error": "Missing user_input or ai_response"}), 400
274
-
275
- training_entry = {
276
- "input": user_input,
277
- "output": ai_response,
278
- "model_used": model_used,
279
- "timestamp": datetime.now().isoformat(),
280
- "training_id": str(time.time()),
281
- "learning_score": 1.0,
282
- "is_new_learning": True,
283
- "auto_generated": False,
284
- "context": {
285
- "query_length": len(user_input),
286
- "response_length": len(ai_response),
287
- "training_mode": "manual_openai_gpt4o_mini",
288
- "source": "user_interaction"
289
- }
290
  }
291
 
292
- CONVERSATION_BUFFER.append(training_entry)
293
 
294
- if len(CONVERSATION_BUFFER) >= MAX_BUFFER_SIZE:
295
- save_to_training_dataset(CONVERSATION_BUFFER.copy())
296
- CONVERSATION_BUFFER.clear()
 
297
 
298
  return jsonify({
299
  "success": True,
300
- "buffered": len(CONVERSATION_BUFFER),
301
- "message": f"Training example buffered ({len(CONVERSATION_BUFFER)}/{MAX_BUFFER_SIZE})"
 
 
302
  })
303
 
304
  @app.route("/api/dataset-stats", methods=["GET"])
305
  def dataset_stats():
306
- """Get statistics about the training dataset"""
307
  try:
308
  training_data = load_training_dataset()
309
 
310
- total_examples = len(training_data)
311
- total_tokens = sum(len(d.get("input", "")) + len(d.get("output", "")) for d in training_data)
312
- models_used = {}
313
- auto_generated = sum(1 for d in training_data if d.get("auto_generated", False))
314
-
315
- for example in training_data:
316
- model = example.get("model_used", "unknown")
317
- models_used[model] = models_used.get(model, 0) + 1
318
 
319
  return jsonify({
320
  "success": True,
321
- "total_examples": total_examples,
322
- "total_tokens": total_tokens,
323
- "models_used": models_used,
324
- "buffered": len(CONVERSATION_BUFFER),
325
- "auto_generated": auto_generated,
326
- "manual_generated": total_examples - auto_generated,
327
  "dataset_url": f"https://huggingface.co/datasets/{TRAINING_DATASET}"
328
  })
329
  except Exception as e:
@@ -331,57 +269,67 @@ def dataset_stats():
331
 
332
  @app.route("/api/flush-buffer", methods=["POST"])
333
  def flush_buffer():
334
- """Manually flush the training buffer to HuggingFace"""
335
- global CONVERSATION_BUFFER
336
 
337
- if not CONVERSATION_BUFFER:
338
  return jsonify({"message": "Buffer is empty, nothing to flush"})
339
 
340
- success = save_to_training_dataset(CONVERSATION_BUFFER.copy())
341
- count = len(CONVERSATION_BUFFER)
342
- CONVERSATION_BUFFER.clear()
343
 
344
  if success:
345
  return jsonify({
346
  "success": True,
347
- "message": f"Flushed {count} training examples to HuggingFace"
348
  })
349
  else:
350
  return jsonify({"error": "Failed to flush buffer"}), 500
351
 
 
 
 
 
 
 
 
 
 
 
352
  if __name__ == "__main__":
353
- port = int(os.getenv("PORT", 7860))
354
 
355
- print("πŸš€ Starting ContinuumLearner Training Server...")
356
  print(f"πŸ“Š Training Dataset: {TRAINING_DATASET}")
357
  print(f"πŸŽ“ Dataset URL: https://huggingface.co/datasets/{TRAINING_DATASET}")
358
  print("")
359
- print("πŸ€– Training Mode: OpenAI GPT-4o-mini with AUTO-TRAINING")
360
- print(" - Automatic training enabled")
361
- print(f" - Training interval: {AUTO_TRAINING_INTERVAL} seconds")
362
- print(f" - Prompts per session: {TRAINING_PROMPTS_PER_SESSION}")
363
- print(" - Very low credit usage per request")
364
- print(" - Responses are saved as training data")
365
- print(" - ContinuumGPT learns continuously")
366
  print("")
367
 
368
  if OPENAI_API_KEY:
369
  print("βœ… OpenAI API Key Configured")
370
- # Start auto-training worker in background thread
371
- training_thread = threading.Thread(target=auto_train_worker, daemon=True)
372
- training_thread.start()
373
- print("βœ… Auto-training worker started")
374
  else:
375
  print("⚠️ OpenAI API Key Missing - Add OPENAI_API_KEY to enable")
376
- print(" - Auto-training will be paused until API key is added")
377
 
378
  if HF_TOKEN:
379
  print("βœ… HuggingFace Integration Active")
380
  training_data = load_training_dataset()
381
- print(f"πŸ“š Current dataset size: {len(training_data)} training examples")
 
 
 
 
382
  else:
383
  print("⚠️ HuggingFace Integration Disabled - Add HF_TOKEN to enable")
384
- print(" - Training data will be stored in buffer only")
385
 
386
  print("")
387
- app.run(host="0.0.0.0", port=port, debug=False, threaded=True)
 
 
1
  import os
2
  import json
3
  import time
 
7
  from flask_cors import CORS
8
  from datasets import load_dataset, Dataset
9
  from openai import OpenAI
 
10
 
11
  app = Flask(__name__, static_folder=".", static_url_path="")
12
  CORS(app, supports_credentials=True)
 
18
  # HuggingFace Configuration
19
  HF_TOKEN = os.getenv("HF_TOKEN")
20
  TRAINING_DATASET = "Sahil5112/ContinuumGPT"
21
+
22
+ # Hierarchical Memory Configuration
23
+ LEVEL_1_MAX = 20 # Max entries before archiving to Level 2
24
+ LEVEL_2_MAX = 50 # Max entries before archiving to Level 3
25
+ AUTO_ARCHIVE_ENABLED = True
26
+ AUTO_ARCHIVE_INTERVAL = 600 # 10 minutes
27
+
28
+ # Memory buffer
29
+ MEMORY_BUFFER = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  def load_training_dataset():
32
  """Load existing training data from HuggingFace"""
 
52
  existing_data.extend(training_examples)
53
  dataset = Dataset.from_list(existing_data)
54
  dataset.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
55
+ print(f"βœ… Saved {len(training_examples)} entries to {TRAINING_DATASET}")
56
+ print(f"πŸ“Š Total dataset size: {len(existing_data)} entries")
57
  return True
58
  except Exception as e:
59
  print(f"❌ Error saving to dataset: {e}")
 
62
  def call_openai_gpt4o_mini(prompt):
63
  """Call OpenAI GPT-4o-mini"""
64
  if not client:
65
+ return None
 
 
 
 
66
 
67
  try:
68
  response = client.chat.completions.create(
69
  model="gpt-4o-mini",
70
  messages=[
71
+ {"role": "system", "content": "You are ContinuumGPT, a helpful AI assistant."},
72
  {"role": "user", "content": prompt}
73
  ],
74
  temperature=0.7,
75
  max_tokens=1000
76
  )
77
+ return response.choices[0].message.content
78
+ except Exception as e:
79
+ print(f"OpenAI API Error: {e}")
80
+ return None
81
 
82
+ def summarize_conversation(query, response):
83
+ """Create a summary of the conversation using AI"""
84
+ if not client:
85
+ # Fallback summary without AI
86
+ return f"{query[:50]}... -> {response[:50]}..."
87
 
88
+ try:
89
+ summary_prompt = f"Summarize this conversation in one sentence:\nUser: {query}\nAI: {response}"
90
+ summary = call_openai_gpt4o_mini(summary_prompt)
91
+ return summary if summary else f"Q&A about {query[:30]}..."
92
+ except:
93
+ return f"Q&A about {query[:30]}..."
94
+
95
+ def create_super_summary(summaries):
96
+ """Create a global super-summary from multiple summaries"""
97
+ if not client or not summaries:
98
+ return "General knowledge compilation"
99
+
100
+ try:
101
+ combined = "\n".join(summaries[:10]) # Use last 10 summaries
102
+ prompt = f"Create a brief summary of these conversation topics:\n{combined}"
103
+ super_summary = call_openai_gpt4o_mini(prompt)
104
+ return super_summary if super_summary else "General knowledge compilation"
105
+ except:
106
+ return "General knowledge compilation"
107
+
108
+ def archive_level_1_to_level_2():
109
+ """Archive Level 1 entries to Level 2 with compression"""
110
+ global MEMORY_BUFFER
111
+
112
+ dataset = load_training_dataset()
113
+ level_1_entries = [d for d in dataset if d.get("level") == 1 and not d.get("archived")]
114
+
115
+ if len(level_1_entries) <= LEVEL_1_MAX:
116
+ return
117
+
118
+ print(f"πŸ“¦ Archiving {len(level_1_entries)} Level 1 entries to Level 2...")
119
+
120
+ archived_entries = []
121
+ for entry in level_1_entries:
122
+ archived_entry = {
123
+ "query": entry.get("query"),
124
+ "response": entry.get("response"),
125
+ "summary": entry.get("summary"),
126
+ "archived": True,
127
+ "level": 2,
128
+ "counter": entry.get("counter", 1) + 1,
129
+ "timestamp": datetime.now().isoformat(),
130
+ "original_timestamp": entry.get("timestamp")
131
  }
132
+ archived_entries.append(archived_entry)
133
 
134
+ # Update dataset
135
+ updated_dataset = [d for d in dataset if d.get("level") != 1 or d.get("archived")]
136
+ updated_dataset.extend(archived_entries)
137
+
138
+ if HF_TOKEN:
139
+ dataset_obj = Dataset.from_list(updated_dataset)
140
+ dataset_obj.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
141
+ print(f"βœ… Archived to Level 2: {len(archived_entries)} entries")
142
+
143
+ def archive_level_2_to_level_3():
144
+ """Archive Level 2 entries to Level 3 with super-summarization"""
145
+ dataset = load_training_dataset()
146
+ level_2_entries = [d for d in dataset if d.get("level") == 2]
147
+
148
+ if len(level_2_entries) <= LEVEL_2_MAX:
149
+ return
150
+
151
+ print(f"πŸ—œοΈ Creating Level 3 super-summary from {len(level_2_entries)} Level 2 entries...")
152
+
153
+ summaries = [d.get("summary", "") for d in level_2_entries if d.get("summary")]
154
+ super_summary = create_super_summary(summaries)
155
+
156
+ level_3_entry = {
157
+ "query": "Global Knowledge Archive",
158
+ "response": super_summary,
159
+ "summary": super_summary,
160
+ "archived": True,
161
+ "level": 3,
162
+ "counter": len(level_2_entries),
163
+ "timestamp": datetime.now().isoformat(),
164
+ "entries_compressed": len(level_2_entries)
165
+ }
166
 
167
+ # Keep only Level 1, Level 3, and recent Level 2 entries
168
+ updated_dataset = [d for d in dataset if d.get("level") != 2]
169
+ updated_dataset.append(level_3_entry)
170
+
171
+ if HF_TOKEN:
172
+ dataset_obj = Dataset.from_list(updated_dataset)
173
+ dataset_obj.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
174
+ print(f"βœ… Created Level 3 super-summary")
175
+
176
+ def auto_archive_worker():
177
+ """Background worker that automatically archives memory levels"""
178
+ print("πŸ—„οΈ Auto-archive worker started")
179
+
180
+ while AUTO_ARCHIVE_ENABLED:
181
  try:
182
+ if HF_TOKEN and client:
183
+ print("\nπŸ”„ Running auto-archive check...")
184
+ archive_level_1_to_level_2()
185
+ archive_level_2_to_level_3()
186
+ print(f"⏳ Next archive check in {AUTO_ARCHIVE_INTERVAL}s...\n")
187
+ else:
188
+ print("⚠️ Auto-archive paused - need HF_TOKEN and OPENAI_API_KEY")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  except Exception as e:
190
+ print(f"❌ Auto-archive error: {e}")
191
+
192
+ time.sleep(AUTO_ARCHIVE_INTERVAL)
193
 
194
  @app.route("/")
195
  def index():
196
  return send_from_directory(".", "index.html")
197
 
198
+ @app.route("/api/chat", methods=["POST"])
199
+ def chat():
200
+ """Handle chat requests with hierarchical memory learning"""
201
+ global MEMORY_BUFFER
202
+
203
  data = request.get_json()
204
+ query = data.get("query", "").strip()
205
 
206
+ if not query:
207
+ return jsonify({"success": False, "error": "Missing query"}), 400
208
 
209
+ # Generate response
210
+ response = call_openai_gpt4o_mini(query)
211
 
212
+ if not response:
 
 
 
 
 
 
213
  return jsonify({
214
  "success": False,
215
+ "error": "OPENAI_API_KEY not set. Please add your OpenAI API key to enable AI.",
216
+ "response": None
217
  })
218
 
219
+ # Create summary
220
+ summary = summarize_conversation(query, response)
221
+
222
+ # Create Level 1 memory entry
223
+ memory_entry = {
224
+ "query": query,
225
+ "response": response,
226
+ "summary": summary,
227
+ "archived": False,
228
+ "level": 1,
229
+ "counter": 1,
230
+ "timestamp": datetime.now().isoformat()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  }
232
 
233
+ MEMORY_BUFFER.append(memory_entry)
234
 
235
+ # Save to HuggingFace if we have enough entries
236
+ if len(MEMORY_BUFFER) >= 5:
237
+ if save_to_training_dataset(MEMORY_BUFFER.copy()):
238
+ MEMORY_BUFFER.clear()
239
 
240
  return jsonify({
241
  "success": True,
242
+ "response": response,
243
+ "summary": summary,
244
+ "level": 1,
245
+ "buffered": len(MEMORY_BUFFER)
246
  })
247
 
248
  @app.route("/api/dataset-stats", methods=["GET"])
249
  def dataset_stats():
250
+ """Get statistics about the hierarchical memory dataset"""
251
  try:
252
  training_data = load_training_dataset()
253
 
254
+ level_1 = [d for d in training_data if d.get("level") == 1]
255
+ level_2 = [d for d in training_data if d.get("level") == 2]
256
+ level_3 = [d for d in training_data if d.get("level") == 3]
 
 
 
 
 
257
 
258
  return jsonify({
259
  "success": True,
260
+ "total_entries": len(training_data),
261
+ "level_1_fresh": len([d for d in level_1 if not d.get("archived")]),
262
+ "level_2_archived": len(level_2),
263
+ "level_3_super": len(level_3),
264
+ "buffered": len(MEMORY_BUFFER),
 
265
  "dataset_url": f"https://huggingface.co/datasets/{TRAINING_DATASET}"
266
  })
267
  except Exception as e:
 
269
 
270
  @app.route("/api/flush-buffer", methods=["POST"])
271
  def flush_buffer():
272
+ """Manually flush the memory buffer to HuggingFace"""
273
+ global MEMORY_BUFFER
274
 
275
+ if not MEMORY_BUFFER:
276
  return jsonify({"message": "Buffer is empty, nothing to flush"})
277
 
278
+ success = save_to_training_dataset(MEMORY_BUFFER.copy())
279
+ count = len(MEMORY_BUFFER)
280
+ MEMORY_BUFFER.clear()
281
 
282
  if success:
283
  return jsonify({
284
  "success": True,
285
+ "message": f"Flushed {count} entries to HuggingFace"
286
  })
287
  else:
288
  return jsonify({"error": "Failed to flush buffer"}), 500
289
 
290
+ @app.route("/api/archive-now", methods=["POST"])
291
+ def archive_now():
292
+ """Manually trigger archiving process"""
293
+ try:
294
+ archive_level_1_to_level_2()
295
+ archive_level_2_to_level_3()
296
+ return jsonify({"success": True, "message": "Archiving completed"})
297
+ except Exception as e:
298
+ return jsonify({"error": str(e)}), 500
299
+
300
  if __name__ == "__main__":
301
+ port = int(os.getenv("PORT", 5000))
302
 
303
+ print("πŸš€ Starting ContinuumGPT Hierarchical Memory System...")
304
  print(f"πŸ“Š Training Dataset: {TRAINING_DATASET}")
305
  print(f"πŸŽ“ Dataset URL: https://huggingface.co/datasets/{TRAINING_DATASET}")
306
  print("")
307
+ print("🧠 Hierarchical Memory Architecture:")
308
+ print(f" Level 1 (Fresh): Detailed Q&A (max {LEVEL_1_MAX} before archiving)")
309
+ print(f" Level 2 (Archived): Compressed summaries (max {LEVEL_2_MAX} before archiving)")
310
+ print(f" Level 3 (Super): Global knowledge compilation")
311
+ print(f" Auto-archiving: Every {AUTO_ARCHIVE_INTERVAL} seconds")
 
 
312
  print("")
313
 
314
  if OPENAI_API_KEY:
315
  print("βœ… OpenAI API Key Configured")
316
+ # Start auto-archive worker
317
+ archive_thread = threading.Thread(target=auto_archive_worker, daemon=True)
318
+ archive_thread.start()
319
+ print("βœ… Auto-archive worker started")
320
  else:
321
  print("⚠️ OpenAI API Key Missing - Add OPENAI_API_KEY to enable")
 
322
 
323
  if HF_TOKEN:
324
  print("βœ… HuggingFace Integration Active")
325
  training_data = load_training_dataset()
326
+ level_counts = {1: 0, 2: 0, 3: 0}
327
+ for d in training_data:
328
+ level = d.get("level", 1)
329
+ level_counts[level] = level_counts.get(level, 0) + 1
330
+ print(f"πŸ“š Current dataset: L1={level_counts[1]}, L2={level_counts[2]}, L3={level_counts[3]}")
331
  else:
332
  print("⚠️ HuggingFace Integration Disabled - Add HF_TOKEN to enable")
 
333
 
334
  print("")
335
+ app.run(host="0.0.0.0", port=port, debug=False, threaded=True)