Sahil commited on
Commit
e5c94cc
Β·
verified Β·
1 Parent(s): 98ea673

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -29
app.py CHANGED
@@ -1,11 +1,14 @@
 
1
  import os
2
  import json
3
  import time
 
4
  from datetime import datetime
5
  from flask import Flask, request, jsonify, send_from_directory
6
  from flask_cors import CORS
7
  from datasets import load_dataset, Dataset
8
  from openai import OpenAI
 
9
 
10
  app = Flask(__name__, static_folder=".", static_url_path="")
11
  CORS(app, supports_credentials=True)
@@ -16,9 +19,58 @@ client = OpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
16
 
17
  # HuggingFace Configuration
18
  HF_TOKEN = os.getenv("HF_TOKEN")
19
- TRAINING_DATASET = "Sahil5112/ContinuumGPT" # Main training dataset for ContinuumGPT
20
  CONVERSATION_BUFFER = []
21
- MAX_BUFFER_SIZE = 10 # Save to HF after 10 training examples
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def load_training_dataset():
24
  """Load existing training data from HuggingFace"""
@@ -40,16 +92,10 @@ def save_to_training_dataset(training_examples):
40
  return False
41
 
42
  try:
43
- # Load existing data
44
  existing_data = load_training_dataset()
45
-
46
- # Add new training examples
47
  existing_data.extend(training_examples)
48
-
49
- # Create dataset and push to HF
50
  dataset = Dataset.from_list(existing_data)
51
  dataset.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
52
-
53
  print(f"βœ… Saved {len(training_examples)} training examples to {TRAINING_DATASET}")
54
  print(f"πŸ“Š Total dataset size: {len(existing_data)} examples")
55
  return True
@@ -58,7 +104,7 @@ def save_to_training_dataset(training_examples):
58
  return False
59
 
60
  def call_openai_gpt4o_mini(prompt):
61
- """Call OpenAI GPT-4o-mini - Returns dict with success/error info"""
62
  if not client:
63
  return {
64
  "success": False,
@@ -91,6 +137,100 @@ def call_openai_gpt4o_mini(prompt):
91
  "response": None
92
  }
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  @app.route("/")
95
  def index():
96
  return send_from_directory(".", "index.html")
@@ -100,7 +240,6 @@ def generate_response():
100
  """Generate AI response using OpenAI GPT-4o-mini"""
101
  data = request.get_json()
102
  prompt = data.get("prompt", "").strip()
103
- # model = data.get("model", "puter:gpt-5-nano") # This line is no longer needed as we are defaulting to gpt-4o-mini
104
 
105
  if not prompt:
106
  return jsonify({"success": False, "error": "Missing prompt"}), 400
@@ -111,13 +250,13 @@ def generate_response():
111
  return jsonify({
112
  "success": True,
113
  "response": result["response"],
114
- "model": "gpt-4o-mini" # Explicitly set model to gpt-4o-mini
115
  })
116
  else:
117
  return jsonify({
118
  "success": False,
119
  "error": result["error"],
120
- "model": "gpt-4o-mini" # Explicitly set model to gpt-4o-mini
121
  })
122
 
123
  @app.route("/api/train", methods=["POST"])
@@ -128,12 +267,11 @@ def train_model():
128
  data = request.get_json()
129
  user_input = data.get("user_input", "").strip()
130
  ai_response = data.get("ai_response", "").strip()
131
- model_used = data.get("model_used", "gpt-4o-mini") # Default to gpt-4o-mini
132
 
133
  if not user_input or not ai_response:
134
  return jsonify({"error": "Missing user_input or ai_response"}), 400
135
 
136
- # Create training entry (model learns from this interaction)
137
  training_entry = {
138
  "input": user_input,
139
  "output": ai_response,
@@ -142,18 +280,17 @@ def train_model():
142
  "training_id": str(time.time()),
143
  "learning_score": 1.0,
144
  "is_new_learning": True,
 
145
  "context": {
146
  "query_length": len(user_input),
147
  "response_length": len(ai_response),
148
- "training_mode": "openai_gpt4o_mini", # Updated training mode
149
- "source": "openai" # Updated source
150
  }
151
  }
152
 
153
- # Add to buffer
154
  CONVERSATION_BUFFER.append(training_entry)
155
 
156
- # Auto-save when buffer is full
157
  if len(CONVERSATION_BUFFER) >= MAX_BUFFER_SIZE:
158
  save_to_training_dataset(CONVERSATION_BUFFER.copy())
159
  CONVERSATION_BUFFER.clear()
@@ -170,10 +307,10 @@ def dataset_stats():
170
  try:
171
  training_data = load_training_dataset()
172
 
173
- # Calculate stats
174
  total_examples = len(training_data)
175
  total_tokens = sum(len(d.get("input", "")) + len(d.get("output", "")) for d in training_data)
176
  models_used = {}
 
177
 
178
  for example in training_data:
179
  model = example.get("model_used", "unknown")
@@ -185,6 +322,8 @@ def dataset_stats():
185
  "total_tokens": total_tokens,
186
  "models_used": models_used,
187
  "buffered": len(CONVERSATION_BUFFER),
 
 
188
  "dataset_url": f"https://huggingface.co/datasets/{TRAINING_DATASET}"
189
  })
190
  except Exception as e:
@@ -211,23 +350,30 @@ def flush_buffer():
211
  return jsonify({"error": "Failed to flush buffer"}), 500
212
 
213
  if __name__ == "__main__":
214
- port = int(os.getenv("PORT", 7860))
215
 
216
  print("πŸš€ Starting ContinuumLearner Training Server...")
217
  print(f"πŸ“Š Training Dataset: {TRAINING_DATASET}")
218
  print(f"πŸŽ“ Dataset URL: https://huggingface.co/datasets/{TRAINING_DATASET}")
219
  print("")
220
- print("πŸ€– Training Mode: OpenAI GPT-4o-mini") # Updated training mode description
221
- print(" - Uses OpenAI GPT-4o-mini for responses") # Updated description
222
- print(" - Very low credit usage per request") # Added note about credit usage
 
 
223
  print(" - Responses are saved as training data")
224
- print(" - ContinuumGPT learns from these patterns")
225
  print("")
226
 
227
  if OPENAI_API_KEY:
228
- print("βœ… OpenAI API Key Configured") # New message for OpenAI key
 
 
 
 
229
  else:
230
- print("⚠️ OpenAI API Key Missing - Add OPENAI_API_KEY to enable") # New message for missing OpenAI key
 
231
 
232
  if HF_TOKEN:
233
  print("βœ… HuggingFace Integration Active")
@@ -235,7 +381,7 @@ if __name__ == "__main__":
235
  print(f"πŸ“š Current dataset size: {len(training_data)} training examples")
236
  else:
237
  print("⚠️ HuggingFace Integration Disabled - Add HF_TOKEN to enable")
238
- print(" - You can still use the app, but responses will show warnings")
239
- print(" - Training data won't be saved to HuggingFace")
240
 
241
- app.run(host="0.0.0.0", port=port, debug=False, threaded=True)
 
 
1
+
2
  import os
3
  import json
4
  import time
5
+ import threading
6
  from datetime import datetime
7
  from flask import Flask, request, jsonify, send_from_directory
8
  from flask_cors import CORS
9
  from datasets import load_dataset, Dataset
10
  from openai import OpenAI
11
+ import random
12
 
13
  app = Flask(__name__, static_folder=".", static_url_path="")
14
  CORS(app, supports_credentials=True)
 
19
 
20
  # HuggingFace Configuration
21
  HF_TOKEN = os.getenv("HF_TOKEN")
22
+ TRAINING_DATASET = "Sahil5112/ContinuumGPT"
23
  CONVERSATION_BUFFER = []
24
+ MAX_BUFFER_SIZE = 10
25
+
26
+ # Auto-training configuration
27
+ AUTO_TRAINING_ENABLED = True
28
+ AUTO_TRAINING_INTERVAL = 300 # 5 minutes between auto-training sessions
29
+ TRAINING_PROMPTS_PER_SESSION = 3 # Number of prompts to generate per session
30
+
31
+ # Diverse training prompt templates
32
+ TRAINING_TEMPLATES = [
33
+ # Questions
34
+ "What is {topic}?",
35
+ "How does {topic} work?",
36
+ "Explain {topic} in simple terms",
37
+ "What are the benefits of {topic}?",
38
+ "What are common mistakes with {topic}?",
39
+
40
+ # Technical
41
+ "Write a Python function to {task}",
42
+ "How do I implement {feature} in JavaScript?",
43
+ "Debug this code: {code_snippet}",
44
+ "Best practices for {topic}",
45
+ "Compare {concept1} vs {concept2}",
46
+
47
+ # Creative
48
+ "Write a short story about {topic}",
49
+ "Create a poem about {theme}",
50
+ "Describe {object} creatively",
51
+
52
+ # Analysis
53
+ "Analyze the pros and cons of {topic}",
54
+ "What trends are emerging in {field}?",
55
+ "Predict the future of {technology}",
56
+
57
+ # Practical
58
+ "Give me tips for {activity}",
59
+ "How can I improve my {skill}?",
60
+ "What should I know about {subject}?"
61
+ ]
62
+
63
+ TRAINING_TOPICS = [
64
+ "machine learning", "web development", "Python programming", "data science",
65
+ "artificial intelligence", "cloud computing", "cybersecurity", "blockchain",
66
+ "mobile apps", "APIs", "databases", "React", "Node.js", "algorithms",
67
+ "leadership", "productivity", "time management", "communication",
68
+ "creativity", "problem solving", "critical thinking", "decision making",
69
+ "space exploration", "climate change", "renewable energy", "quantum computing",
70
+ "biotechnology", "robotics", "virtual reality", "augmented reality",
71
+ "healthy eating", "exercise", "meditation", "stress management",
72
+ "financial planning", "investing", "entrepreneurship", "marketing"
73
+ ]
74
 
75
  def load_training_dataset():
76
  """Load existing training data from HuggingFace"""
 
92
  return False
93
 
94
  try:
 
95
  existing_data = load_training_dataset()
 
 
96
  existing_data.extend(training_examples)
 
 
97
  dataset = Dataset.from_list(existing_data)
98
  dataset.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
 
99
  print(f"βœ… Saved {len(training_examples)} training examples to {TRAINING_DATASET}")
100
  print(f"πŸ“Š Total dataset size: {len(existing_data)} examples")
101
  return True
 
104
  return False
105
 
106
  def call_openai_gpt4o_mini(prompt):
107
+ """Call OpenAI GPT-4o-mini"""
108
  if not client:
109
  return {
110
  "success": False,
 
137
  "response": None
138
  }
139
 
140
+ def generate_training_prompt():
141
+ """Generate a diverse training prompt"""
142
+ template = random.choice(TRAINING_TEMPLATES)
143
+
144
+ if "{topic}" in template:
145
+ topic = random.choice(TRAINING_TOPICS)
146
+ return template.replace("{topic}", topic)
147
+ elif "{task}" in template:
148
+ tasks = ["sort a list", "reverse a string", "calculate factorial", "find prime numbers", "merge arrays"]
149
+ return template.replace("{task}", random.choice(tasks))
150
+ elif "{feature}" in template:
151
+ features = ["authentication", "form validation", "API calls", "state management", "routing"]
152
+ return template.replace("{feature}", random.choice(features))
153
+ elif "{theme}" in template:
154
+ themes = ["technology", "nature", "future", "dreams", "adventure"]
155
+ return template.replace("{theme}", random.choice(themes))
156
+ elif "{field}" in template:
157
+ fields = ["AI", "software development", "healthcare", "education", "finance"]
158
+ return template.replace("{field}", random.choice(fields))
159
+ elif "{concept1}" in template and "{concept2}" in template:
160
+ pairs = [("SQL", "NoSQL"), ("REST", "GraphQL"), ("React", "Vue"), ("Python", "JavaScript")]
161
+ pair = random.choice(pairs)
162
+ return template.replace("{concept1}", pair[0]).replace("{concept2}", pair[1])
163
+ else:
164
+ return template
165
+
166
+ def auto_train_worker():
167
+ """Background worker that continuously trains the model"""
168
+ print("πŸ€– Auto-training worker started")
169
+
170
+ while AUTO_TRAINING_ENABLED:
171
+ try:
172
+ if not client:
173
+ print("⚠️ Auto-training paused - OPENAI_API_KEY not set")
174
+ time.sleep(AUTO_TRAINING_INTERVAL)
175
+ continue
176
+
177
+ print(f"\nπŸŽ“ Starting auto-training session ({TRAINING_PROMPTS_PER_SESSION} prompts)...")
178
+ session_examples = []
179
+
180
+ for i in range(TRAINING_PROMPTS_PER_SESSION):
181
+ # Generate diverse prompt
182
+ prompt = generate_training_prompt()
183
+ print(f" πŸ“ Prompt {i+1}: {prompt[:60]}...")
184
+
185
+ # Get AI response
186
+ result = call_openai_gpt4o_mini(prompt)
187
+
188
+ if result["success"]:
189
+ # Create training entry
190
+ training_entry = {
191
+ "input": prompt,
192
+ "output": result["response"],
193
+ "model_used": "gpt-4o-mini",
194
+ "timestamp": datetime.now().isoformat(),
195
+ "training_id": str(time.time()),
196
+ "learning_score": 1.0,
197
+ "is_new_learning": True,
198
+ "auto_generated": True,
199
+ "context": {
200
+ "query_length": len(prompt),
201
+ "response_length": len(result["response"]),
202
+ "training_mode": "auto_openai_gpt4o_mini",
203
+ "source": "auto_training"
204
+ }
205
+ }
206
+
207
+ session_examples.append(training_entry)
208
+ CONVERSATION_BUFFER.append(training_entry)
209
+ print(f" βœ… Response generated ({len(result['response'])} chars)")
210
+ else:
211
+ print(f" ❌ Error: {result['error']}")
212
+
213
+ # Small delay between requests to avoid rate limits
214
+ time.sleep(2)
215
+
216
+ # Save to HuggingFace if buffer is full or session complete
217
+ if len(CONVERSATION_BUFFER) >= MAX_BUFFER_SIZE:
218
+ print(f"\nπŸ’Ύ Auto-saving {len(CONVERSATION_BUFFER)} examples to HuggingFace...")
219
+ if save_to_training_dataset(CONVERSATION_BUFFER.copy()):
220
+ CONVERSATION_BUFFER.clear()
221
+ print("βœ… Auto-save successful")
222
+ else:
223
+ print("❌ Auto-save failed (will retry next session)")
224
+
225
+ dataset_size = len(load_training_dataset())
226
+ print(f"πŸ“Š Dataset now contains {dataset_size} total examples")
227
+ print(f"⏳ Next auto-training session in {AUTO_TRAINING_INTERVAL}s...\n")
228
+
229
+ except Exception as e:
230
+ print(f"❌ Auto-training error: {e}")
231
+
232
+ time.sleep(AUTO_TRAINING_INTERVAL)
233
+
234
  @app.route("/")
235
  def index():
236
  return send_from_directory(".", "index.html")
 
240
  """Generate AI response using OpenAI GPT-4o-mini"""
241
  data = request.get_json()
242
  prompt = data.get("prompt", "").strip()
 
243
 
244
  if not prompt:
245
  return jsonify({"success": False, "error": "Missing prompt"}), 400
 
250
  return jsonify({
251
  "success": True,
252
  "response": result["response"],
253
+ "model": "gpt-4o-mini"
254
  })
255
  else:
256
  return jsonify({
257
  "success": False,
258
  "error": result["error"],
259
+ "model": "gpt-4o-mini"
260
  })
261
 
262
  @app.route("/api/train", methods=["POST"])
 
267
  data = request.get_json()
268
  user_input = data.get("user_input", "").strip()
269
  ai_response = data.get("ai_response", "").strip()
270
+ model_used = data.get("model_used", "gpt-4o-mini")
271
 
272
  if not user_input or not ai_response:
273
  return jsonify({"error": "Missing user_input or ai_response"}), 400
274
 
 
275
  training_entry = {
276
  "input": user_input,
277
  "output": ai_response,
 
280
  "training_id": str(time.time()),
281
  "learning_score": 1.0,
282
  "is_new_learning": True,
283
+ "auto_generated": False,
284
  "context": {
285
  "query_length": len(user_input),
286
  "response_length": len(ai_response),
287
+ "training_mode": "manual_openai_gpt4o_mini",
288
+ "source": "user_interaction"
289
  }
290
  }
291
 
 
292
  CONVERSATION_BUFFER.append(training_entry)
293
 
 
294
  if len(CONVERSATION_BUFFER) >= MAX_BUFFER_SIZE:
295
  save_to_training_dataset(CONVERSATION_BUFFER.copy())
296
  CONVERSATION_BUFFER.clear()
 
307
  try:
308
  training_data = load_training_dataset()
309
 
 
310
  total_examples = len(training_data)
311
  total_tokens = sum(len(d.get("input", "")) + len(d.get("output", "")) for d in training_data)
312
  models_used = {}
313
+ auto_generated = sum(1 for d in training_data if d.get("auto_generated", False))
314
 
315
  for example in training_data:
316
  model = example.get("model_used", "unknown")
 
322
  "total_tokens": total_tokens,
323
  "models_used": models_used,
324
  "buffered": len(CONVERSATION_BUFFER),
325
+ "auto_generated": auto_generated,
326
+ "manual_generated": total_examples - auto_generated,
327
  "dataset_url": f"https://huggingface.co/datasets/{TRAINING_DATASET}"
328
  })
329
  except Exception as e:
 
350
  return jsonify({"error": "Failed to flush buffer"}), 500
351
 
352
  if __name__ == "__main__":
353
+ port = int(os.getenv("PORT", 5000))
354
 
355
  print("πŸš€ Starting ContinuumLearner Training Server...")
356
  print(f"πŸ“Š Training Dataset: {TRAINING_DATASET}")
357
  print(f"πŸŽ“ Dataset URL: https://huggingface.co/datasets/{TRAINING_DATASET}")
358
  print("")
359
+ print("πŸ€– Training Mode: OpenAI GPT-4o-mini with AUTO-TRAINING")
360
+ print(" - Automatic training enabled")
361
+ print(f" - Training interval: {AUTO_TRAINING_INTERVAL} seconds")
362
+ print(f" - Prompts per session: {TRAINING_PROMPTS_PER_SESSION}")
363
+ print(" - Very low credit usage per request")
364
  print(" - Responses are saved as training data")
365
+ print(" - ContinuumGPT learns continuously")
366
  print("")
367
 
368
  if OPENAI_API_KEY:
369
+ print("βœ… OpenAI API Key Configured")
370
+ # Start auto-training worker in background thread
371
+ training_thread = threading.Thread(target=auto_train_worker, daemon=True)
372
+ training_thread.start()
373
+ print("βœ… Auto-training worker started")
374
  else:
375
+ print("⚠️ OpenAI API Key Missing - Add OPENAI_API_KEY to enable")
376
+ print(" - Auto-training will be paused until API key is added")
377
 
378
  if HF_TOKEN:
379
  print("βœ… HuggingFace Integration Active")
 
381
  print(f"πŸ“š Current dataset size: {len(training_data)} training examples")
382
  else:
383
  print("⚠️ HuggingFace Integration Disabled - Add HF_TOKEN to enable")
384
+ print(" - Training data will be stored in buffer only")
 
385
 
386
+ print("")
387
+ app.run(host="0.0.0.0", port=port, debug=False, threaded=True)