Spaces:

Sachin5112
/

continuumlearner

Sleeping

App Files Files Community

Sahil commited on Oct 30, 2025

Commit

e5c94cc

verified ·

1 Parent(s): 98ea673

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -29

app.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import os
 import json
 import time
 from datetime import datetime
 from flask import Flask, request, jsonify, send_from_directory
 from flask_cors import CORS
 from datasets import load_dataset, Dataset
 from openai import OpenAI
 app = Flask(__name__, static_folder=".", static_url_path="")
 CORS(app, supports_credentials=True)
@@ -16,9 +19,58 @@ client = OpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
 # HuggingFace Configuration
 HF_TOKEN = os.getenv("HF_TOKEN")
-TRAINING_DATASET = "Sahil5112/ContinuumGPT"  # Main training dataset for ContinuumGPT
 CONVERSATION_BUFFER = []
-MAX_BUFFER_SIZE = 10  # Save to HF after 10 training examples
 def load_training_dataset():
     """Load existing training data from HuggingFace"""
@@ -40,16 +92,10 @@ def save_to_training_dataset(training_examples):
         return False
     try:
-        # Load existing data
         existing_data = load_training_dataset()
-        # Add new training examples
         existing_data.extend(training_examples)
-        # Create dataset and push to HF
         dataset = Dataset.from_list(existing_data)
         dataset.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
         print(f"✅ Saved {len(training_examples)} training examples to {TRAINING_DATASET}")
         print(f"📊 Total dataset size: {len(existing_data)} examples")
         return True
@@ -58,7 +104,7 @@ def save_to_training_dataset(training_examples):
         return False
 def call_openai_gpt4o_mini(prompt):
-    """Call OpenAI GPT-4o-mini - Returns dict with success/error info"""
     if not client:
         return {
             "success": False,
@@ -91,6 +137,100 @@ def call_openai_gpt4o_mini(prompt):
             "response": None
         }
 @app.route("/")
 def index():
     return send_from_directory(".", "index.html")
@@ -100,7 +240,6 @@ def generate_response():
     """Generate AI response using OpenAI GPT-4o-mini"""
     data = request.get_json()
     prompt = data.get("prompt", "").strip()
-    # model = data.get("model", "puter:gpt-5-nano") # This line is no longer needed as we are defaulting to gpt-4o-mini
     if not prompt:
         return jsonify({"success": False, "error": "Missing prompt"}), 400
@@ -111,13 +250,13 @@ def generate_response():
         return jsonify({
             "success": True,
             "response": result["response"],
-            "model": "gpt-4o-mini" # Explicitly set model to gpt-4o-mini
         })
     else:
         return jsonify({
             "success": False,
             "error": result["error"],
-            "model": "gpt-4o-mini" # Explicitly set model to gpt-4o-mini
         })
 @app.route("/api/train", methods=["POST"])
@@ -128,12 +267,11 @@ def train_model():
     data = request.get_json()
     user_input = data.get("user_input", "").strip()
     ai_response = data.get("ai_response", "").strip()
-    model_used = data.get("model_used", "gpt-4o-mini") # Default to gpt-4o-mini
     if not user_input or not ai_response:
         return jsonify({"error": "Missing user_input or ai_response"}), 400
-    # Create training entry (model learns from this interaction)
     training_entry = {
         "input": user_input,
         "output": ai_response,
@@ -142,18 +280,17 @@ def train_model():
         "training_id": str(time.time()),
         "learning_score": 1.0,
         "is_new_learning": True,
         "context": {
             "query_length": len(user_input),
             "response_length": len(ai_response),
-            "training_mode": "openai_gpt4o_mini", # Updated training mode
-            "source": "openai" # Updated source
         }
     }
-    # Add to buffer
     CONVERSATION_BUFFER.append(training_entry)
-    # Auto-save when buffer is full
     if len(CONVERSATION_BUFFER) >= MAX_BUFFER_SIZE:
         save_to_training_dataset(CONVERSATION_BUFFER.copy())
         CONVERSATION_BUFFER.clear()
@@ -170,10 +307,10 @@ def dataset_stats():
     try:
         training_data = load_training_dataset()
-        # Calculate stats
         total_examples = len(training_data)
         total_tokens = sum(len(d.get("input", "")) + len(d.get("output", "")) for d in training_data)
         models_used = {}
         for example in training_data:
             model = example.get("model_used", "unknown")
@@ -185,6 +322,8 @@ def dataset_stats():
             "total_tokens": total_tokens,
             "models_used": models_used,
             "buffered": len(CONVERSATION_BUFFER),
             "dataset_url": f"https://huggingface.co/datasets/{TRAINING_DATASET}"
         })
     except Exception as e:
@@ -211,23 +350,30 @@ def flush_buffer():
         return jsonify({"error": "Failed to flush buffer"}), 500
 if __name__ == "__main__":
-    port = int(os.getenv("PORT", 7860))
     print("🚀 Starting ContinuumLearner Training Server...")
     print(f"📊 Training Dataset: {TRAINING_DATASET}")
     print(f"🎓 Dataset URL: https://huggingface.co/datasets/{TRAINING_DATASET}")
     print("")
-    print("🤖 Training Mode: OpenAI GPT-4o-mini") # Updated training mode description
-    print("   - Uses OpenAI GPT-4o-mini for responses") # Updated description
-    print("   - Very low credit usage per request") # Added note about credit usage
     print("   - Responses are saved as training data")
-    print("   - ContinuumGPT learns from these patterns")
     print("")
     if OPENAI_API_KEY:
-        print("✅ OpenAI API Key Configured") # New message for OpenAI key
     else:
-        print("⚠️ OpenAI API Key Missing - Add OPENAI_API_KEY to enable") # New message for missing OpenAI key
     if HF_TOKEN:
         print("✅ HuggingFace Integration Active")
@@ -235,7 +381,7 @@ if __name__ == "__main__":
         print(f"📚 Current dataset size: {len(training_data)} training examples")
     else:
         print("⚠️ HuggingFace Integration Disabled - Add HF_TOKEN to enable")
-        print("   - You can still use the app, but responses will show warnings")
-        print("   - Training data won't be saved to HuggingFace")
-    app.run(host="0.0.0.0", port=port, debug=False, threaded=True)

 import os
 import json
 import time
+import threading
 from datetime import datetime
 from flask import Flask, request, jsonify, send_from_directory
 from flask_cors import CORS
 from datasets import load_dataset, Dataset
 from openai import OpenAI
+import random
 app = Flask(__name__, static_folder=".", static_url_path="")
 CORS(app, supports_credentials=True)
 # HuggingFace Configuration
 HF_TOKEN = os.getenv("HF_TOKEN")
+TRAINING_DATASET = "Sahil5112/ContinuumGPT"
 CONVERSATION_BUFFER = []
+MAX_BUFFER_SIZE = 10
+# Auto-training configuration
+AUTO_TRAINING_ENABLED = True
+AUTO_TRAINING_INTERVAL = 300  # 5 minutes between auto-training sessions
+TRAINING_PROMPTS_PER_SESSION = 3  # Number of prompts to generate per session
+# Diverse training prompt templates
+TRAINING_TEMPLATES = [
+    # Questions
+    "What is {topic}?",
+    "How does {topic} work?",
+    "Explain {topic} in simple terms",
+    "What are the benefits of {topic}?",
+    "What are common mistakes with {topic}?",
+    # Technical
+    "Write a Python function to {task}",
+    "How do I implement {feature} in JavaScript?",
+    "Debug this code: {code_snippet}",
+    "Best practices for {topic}",
+    "Compare {concept1} vs {concept2}",
+    # Creative
+    "Write a short story about {topic}",
+    "Create a poem about {theme}",
+    "Describe {object} creatively",
+    # Analysis
+    "Analyze the pros and cons of {topic}",
+    "What trends are emerging in {field}?",
+    "Predict the future of {technology}",
+    # Practical
+    "Give me tips for {activity}",
+    "How can I improve my {skill}?",
+    "What should I know about {subject}?"
+]
+TRAINING_TOPICS = [
+    "machine learning", "web development", "Python programming", "data science",
+    "artificial intelligence", "cloud computing", "cybersecurity", "blockchain",
+    "mobile apps", "APIs", "databases", "React", "Node.js", "algorithms",
+    "leadership", "productivity", "time management", "communication",
+    "creativity", "problem solving", "critical thinking", "decision making",
+    "space exploration", "climate change", "renewable energy", "quantum computing",
+    "biotechnology", "robotics", "virtual reality", "augmented reality",
+    "healthy eating", "exercise", "meditation", "stress management",
+    "financial planning", "investing", "entrepreneurship", "marketing"
+]
 def load_training_dataset():
     """Load existing training data from HuggingFace"""
         return False
     try:
         existing_data = load_training_dataset()
         existing_data.extend(training_examples)
         dataset = Dataset.from_list(existing_data)
         dataset.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
         print(f"✅ Saved {len(training_examples)} training examples to {TRAINING_DATASET}")
         print(f"📊 Total dataset size: {len(existing_data)} examples")
         return True
         return False
 def call_openai_gpt4o_mini(prompt):
+    """Call OpenAI GPT-4o-mini"""
     if not client:
         return {
             "success": False,
             "response": None
         }
+def generate_training_prompt():
+    """Generate a diverse training prompt"""
+    template = random.choice(TRAINING_TEMPLATES)
+    if "{topic}" in template:
+        topic = random.choice(TRAINING_TOPICS)
+        return template.replace("{topic}", topic)
+    elif "{task}" in template:
+        tasks = ["sort a list", "reverse a string", "calculate factorial", "find prime numbers", "merge arrays"]
+        return template.replace("{task}", random.choice(tasks))
+    elif "{feature}" in template:
+        features = ["authentication", "form validation", "API calls", "state management", "routing"]
+        return template.replace("{feature}", random.choice(features))
+    elif "{theme}" in template:
+        themes = ["technology", "nature", "future", "dreams", "adventure"]
+        return template.replace("{theme}", random.choice(themes))
+    elif "{field}" in template:
+        fields = ["AI", "software development", "healthcare", "education", "finance"]
+        return template.replace("{field}", random.choice(fields))
+    elif "{concept1}" in template and "{concept2}" in template:
+        pairs = [("SQL", "NoSQL"), ("REST", "GraphQL"), ("React", "Vue"), ("Python", "JavaScript")]
+        pair = random.choice(pairs)
+        return template.replace("{concept1}", pair[0]).replace("{concept2}", pair[1])
+    else:
+        return template
+def auto_train_worker():
+    """Background worker that continuously trains the model"""
+    print("🤖 Auto-training worker started")
+    while AUTO_TRAINING_ENABLED:
+        try:
+            if not client:
+                print("⚠️ Auto-training paused - OPENAI_API_KEY not set")
+                time.sleep(AUTO_TRAINING_INTERVAL)
+                continue
+            print(f"\n🎓 Starting auto-training session ({TRAINING_PROMPTS_PER_SESSION} prompts)...")
+            session_examples = []
+            for i in range(TRAINING_PROMPTS_PER_SESSION):
+                # Generate diverse prompt
+                prompt = generate_training_prompt()
+                print(f"  📝 Prompt {i+1}: {prompt[:60]}...")
+                # Get AI response
+                result = call_openai_gpt4o_mini(prompt)
+                if result["success"]:
+                    # Create training entry
+                    training_entry = {
+                        "input": prompt,
+                        "output": result["response"],
+                        "model_used": "gpt-4o-mini",
+                        "timestamp": datetime.now().isoformat(),
+                        "training_id": str(time.time()),
+                        "learning_score": 1.0,
+                        "is_new_learning": True,
+                        "auto_generated": True,
+                        "context": {
+                            "query_length": len(prompt),
+                            "response_length": len(result["response"]),
+                            "training_mode": "auto_openai_gpt4o_mini",
+                            "source": "auto_training"
+                        }
+                    }
+                    session_examples.append(training_entry)
+                    CONVERSATION_BUFFER.append(training_entry)
+                    print(f"  ✅ Response generated ({len(result['response'])} chars)")
+                else:
+                    print(f"  ❌ Error: {result['error']}")
+                # Small delay between requests to avoid rate limits
+                time.sleep(2)
+            # Save to HuggingFace if buffer is full or session complete
+            if len(CONVERSATION_BUFFER) >= MAX_BUFFER_SIZE:
+                print(f"\n💾 Auto-saving {len(CONVERSATION_BUFFER)} examples to HuggingFace...")
+                if save_to_training_dataset(CONVERSATION_BUFFER.copy()):
+                    CONVERSATION_BUFFER.clear()
+                    print("✅ Auto-save successful")
+                else:
+                    print("❌ Auto-save failed (will retry next session)")
+            dataset_size = len(load_training_dataset())
+            print(f"📊 Dataset now contains {dataset_size} total examples")
+            print(f"⏳ Next auto-training session in {AUTO_TRAINING_INTERVAL}s...\n")
+        except Exception as e:
+            print(f"❌ Auto-training error: {e}")
+        time.sleep(AUTO_TRAINING_INTERVAL)
 @app.route("/")
 def index():
     return send_from_directory(".", "index.html")
     """Generate AI response using OpenAI GPT-4o-mini"""
     data = request.get_json()
     prompt = data.get("prompt", "").strip()
     if not prompt:
         return jsonify({"success": False, "error": "Missing prompt"}), 400
         return jsonify({
             "success": True,
             "response": result["response"],
+            "model": "gpt-4o-mini"
         })
     else:
         return jsonify({
             "success": False,
             "error": result["error"],
+            "model": "gpt-4o-mini"
         })
 @app.route("/api/train", methods=["POST"])
     data = request.get_json()
     user_input = data.get("user_input", "").strip()
     ai_response = data.get("ai_response", "").strip()
+    model_used = data.get("model_used", "gpt-4o-mini")
     if not user_input or not ai_response:
         return jsonify({"error": "Missing user_input or ai_response"}), 400
     training_entry = {
         "input": user_input,
         "output": ai_response,
         "training_id": str(time.time()),
         "learning_score": 1.0,
         "is_new_learning": True,
+        "auto_generated": False,
         "context": {
             "query_length": len(user_input),
             "response_length": len(ai_response),
+            "training_mode": "manual_openai_gpt4o_mini",
+            "source": "user_interaction"
         }
     }
     CONVERSATION_BUFFER.append(training_entry)
     if len(CONVERSATION_BUFFER) >= MAX_BUFFER_SIZE:
         save_to_training_dataset(CONVERSATION_BUFFER.copy())
         CONVERSATION_BUFFER.clear()
     try:
         training_data = load_training_dataset()
         total_examples = len(training_data)
         total_tokens = sum(len(d.get("input", "")) + len(d.get("output", "")) for d in training_data)
         models_used = {}
+        auto_generated = sum(1 for d in training_data if d.get("auto_generated", False))
         for example in training_data:
             model = example.get("model_used", "unknown")
             "total_tokens": total_tokens,
             "models_used": models_used,
             "buffered": len(CONVERSATION_BUFFER),
+            "auto_generated": auto_generated,
+            "manual_generated": total_examples - auto_generated,
             "dataset_url": f"https://huggingface.co/datasets/{TRAINING_DATASET}"
         })
     except Exception as e:
         return jsonify({"error": "Failed to flush buffer"}), 500
 if __name__ == "__main__":
+    port = int(os.getenv("PORT", 5000))
     print("🚀 Starting ContinuumLearner Training Server...")
     print(f"📊 Training Dataset: {TRAINING_DATASET}")
     print(f"🎓 Dataset URL: https://huggingface.co/datasets/{TRAINING_DATASET}")
     print("")
+    print("🤖 Training Mode: OpenAI GPT-4o-mini with AUTO-TRAINING")
+    print("   - Automatic training enabled")
+    print(f"   - Training interval: {AUTO_TRAINING_INTERVAL} seconds")
+    print(f"   - Prompts per session: {TRAINING_PROMPTS_PER_SESSION}")
+    print("   - Very low credit usage per request")
     print("   - Responses are saved as training data")
+    print("   - ContinuumGPT learns continuously")
     print("")
     if OPENAI_API_KEY:
+        print("✅ OpenAI API Key Configured")
+        # Start auto-training worker in background thread
+        training_thread = threading.Thread(target=auto_train_worker, daemon=True)
+        training_thread.start()
+        print("✅ Auto-training worker started")
     else:
+        print("⚠️ OpenAI API Key Missing - Add OPENAI_API_KEY to enable")
+        print("   - Auto-training will be paused until API key is added")
     if HF_TOKEN:
         print("✅ HuggingFace Integration Active")
         print(f"📚 Current dataset size: {len(training_data)} training examples")
     else:
         print("⚠️ HuggingFace Integration Disabled - Add HF_TOKEN to enable")
+        print("   - Training data will be stored in buffer only")
+    print("")
+    app.run(host="0.0.0.0", port=port, debug=False, threaded=True)