Spaces:

Sachin5112
/

continuumlearner

Sleeping

App Files Files Community

Sahil commited on Oct 30, 2025

Commit

9b276f2

verified ·

1 Parent(s): 1800769

Update app.py

Browse files

Files changed (1) hide show

app.py +199 -251

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import os
 import json
 import time
@@ -8,7 +7,6 @@ from flask import Flask, request, jsonify, send_from_directory
 from flask_cors import CORS
 from datasets import load_dataset, Dataset
 from openai import OpenAI
-import random
 app = Flask(__name__, static_folder=".", static_url_path="")
 CORS(app, supports_credentials=True)
@@ -20,57 +18,15 @@ client = OpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
 # HuggingFace Configuration
 HF_TOKEN = os.getenv("HF_TOKEN")
 TRAINING_DATASET = "Sahil5112/ContinuumGPT"
-CONVERSATION_BUFFER = []
-MAX_BUFFER_SIZE = 10
-# Auto-training configuration
-AUTO_TRAINING_ENABLED = True
-AUTO_TRAINING_INTERVAL = 300  # 5 minutes between auto-training sessions
-TRAINING_PROMPTS_PER_SESSION = 3  # Number of prompts to generate per session
-# Diverse training prompt templates
-TRAINING_TEMPLATES = [
-    # Questions
-    "What is {topic}?",
-    "How does {topic} work?",
-    "Explain {topic} in simple terms",
-    "What are the benefits of {topic}?",
-    "What are common mistakes with {topic}?",
-    # Technical
-    "Write a Python function to {task}",
-    "How do I implement {feature} in JavaScript?",
-    "Debug this code: {code_snippet}",
-    "Best practices for {topic}",
-    "Compare {concept1} vs {concept2}",
-    # Creative
-    "Write a short story about {topic}",
-    "Create a poem about {theme}",
-    "Describe {object} creatively",
-    # Analysis
-    "Analyze the pros and cons of {topic}",
-    "What trends are emerging in {field}?",
-    "Predict the future of {technology}",
-    # Practical
-    "Give me tips for {activity}",
-    "How can I improve my {skill}?",
-    "What should I know about {subject}?"
-]
-TRAINING_TOPICS = [
-    "machine learning", "web development", "Python programming", "data science",
-    "artificial intelligence", "cloud computing", "cybersecurity", "blockchain",
-    "mobile apps", "APIs", "databases", "React", "Node.js", "algorithms",
-    "leadership", "productivity", "time management", "communication",
-    "creativity", "problem solving", "critical thinking", "decision making",
-    "space exploration", "climate change", "renewable energy", "quantum computing",
-    "biotechnology", "robotics", "virtual reality", "augmented reality",
-    "healthy eating", "exercise", "meditation", "stress management",
-    "financial planning", "investing", "entrepreneurship", "marketing"
-]
 def load_training_dataset():
     """Load existing training data from HuggingFace"""
@@ -96,8 +52,8 @@ def save_to_training_dataset(training_examples):
         existing_data.extend(training_examples)
         dataset = Dataset.from_list(existing_data)
         dataset.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
-        print(f"✅ Saved {len(training_examples)} training examples to {TRAINING_DATASET}")
-        print(f"📊 Total dataset size: {len(existing_data)} examples")
         return True
     except Exception as e:
         print(f"❌ Error saving to dataset: {e}")
@@ -106,224 +62,206 @@ def save_to_training_dataset(training_examples):
 def call_openai_gpt4o_mini(prompt):
     """Call OpenAI GPT-4o-mini"""
     if not client:
-        return {
-            "success": False,
-            "error": "OPENAI_API_KEY not set. Please add your OpenAI API key to enable AI model training.",
-            "response": None
-        }
     try:
         response = client.chat.completions.create(
             model="gpt-4o-mini",
             messages=[
-                {"role": "system", "content": "You are a helpful AI assistant providing training data for ContinuumGPT."},
                 {"role": "user", "content": prompt}
             ],
             temperature=0.7,
             max_tokens=1000
         )
-        generated_text = response.choices[0].message.content
-        return {
-            "success": True,
-            "error": None,
-            "response": generated_text
-        }
-    except Exception as e:
-        return {
-            "success": False,
-            "error": f"OpenAI API Error: {str(e)}",
-            "response": None
         }
-def generate_training_prompt():
-    """Generate a diverse training prompt"""
-    template = random.choice(TRAINING_TEMPLATES)
-    if "{topic}" in template:
-        topic = random.choice(TRAINING_TOPICS)
-        return template.replace("{topic}", topic)
-    elif "{task}" in template:
-        tasks = ["sort a list", "reverse a string", "calculate factorial", "find prime numbers", "merge arrays"]
-        return template.replace("{task}", random.choice(tasks))
-    elif "{feature}" in template:
-        features = ["authentication", "form validation", "API calls", "state management", "routing"]
-        return template.replace("{feature}", random.choice(features))
-    elif "{theme}" in template:
-        themes = ["technology", "nature", "future", "dreams", "adventure"]
-        return template.replace("{theme}", random.choice(themes))
-    elif "{field}" in template:
-        fields = ["AI", "software development", "healthcare", "education", "finance"]
-        return template.replace("{field}", random.choice(fields))
-    elif "{concept1}" in template and "{concept2}" in template:
-        pairs = [("SQL", "NoSQL"), ("REST", "GraphQL"), ("React", "Vue"), ("Python", "JavaScript")]
-        pair = random.choice(pairs)
-        return template.replace("{concept1}", pair[0]).replace("{concept2}", pair[1])
-    else:
-        return template
-def auto_train_worker():
-    """Background worker that continuously trains the model"""
-    print("🤖 Auto-training worker started")
-    while AUTO_TRAINING_ENABLED:
         try:
-            if not client:
-                print("⚠️ Auto-training paused - OPENAI_API_KEY not set")
-                time.sleep(AUTO_TRAINING_INTERVAL)
-                continue
-            print(f"\n🎓 Starting auto-training session ({TRAINING_PROMPTS_PER_SESSION} prompts)...")
-            session_examples = []
-            for i in range(TRAINING_PROMPTS_PER_SESSION):
-                # Generate diverse prompt
-                prompt = generate_training_prompt()
-                print(f"  📝 Prompt {i+1}: {prompt[:60]}...")
-                # Get AI response
-                result = call_openai_gpt4o_mini(prompt)
-                if result["success"]:
-                    # Create training entry
-                    training_entry = {
-                        "input": prompt,
-                        "output": result["response"],
-                        "model_used": "gpt-4o-mini",
-                        "timestamp": datetime.now().isoformat(),
-                        "training_id": str(time.time()),
-                        "learning_score": 1.0,
-                        "is_new_learning": True,
-                        "auto_generated": True,
-                        "context": {
-                            "query_length": len(prompt),
-                            "response_length": len(result["response"]),
-                            "training_mode": "auto_openai_gpt4o_mini",
-                            "source": "auto_training"
-                        }
-                    }
-                    session_examples.append(training_entry)
-                    CONVERSATION_BUFFER.append(training_entry)
-                    print(f"  ✅ Response generated ({len(result['response'])} chars)")
-                else:
-                    print(f"  ❌ Error: {result['error']}")
-                # Small delay between requests to avoid rate limits
-                time.sleep(2)
-            # Save to HuggingFace if buffer is full or session complete
-            if len(CONVERSATION_BUFFER) >= MAX_BUFFER_SIZE:
-                print(f"\n💾 Auto-saving {len(CONVERSATION_BUFFER)} examples to HuggingFace...")
-                if save_to_training_dataset(CONVERSATION_BUFFER.copy()):
-                    CONVERSATION_BUFFER.clear()
-                    print("✅ Auto-save successful")
-                else:
-                    print("❌ Auto-save failed (will retry next session)")
-            dataset_size = len(load_training_dataset())
-            print(f"📊 Dataset now contains {dataset_size} total examples")
-            print(f"⏳ Next auto-training session in {AUTO_TRAINING_INTERVAL}s...\n")
         except Exception as e:
-            print(f"❌ Auto-training error: {e}")
-        time.sleep(AUTO_TRAINING_INTERVAL)
 @app.route("/")
 def index():
     return send_from_directory(".", "index.html")
-@app.route("/api/generate", methods=["POST"])
-def generate_response():
-    """Generate AI response using OpenAI GPT-4o-mini"""
     data = request.get_json()
-    prompt = data.get("prompt", "").strip()
-    if not prompt:
-        return jsonify({"success": False, "error": "Missing prompt"}), 400
-    result = call_openai_gpt4o_mini(prompt)
-    if result["success"]:
-        return jsonify({
-            "success": True,
-            "response": result["response"],
-            "model": "gpt-4o-mini"
-        })
-    else:
         return jsonify({
             "success": False,
-            "error": result["error"],
-            "model": "gpt-4o-mini"
         })
-@app.route("/api/train", methods=["POST"])
-def train_model():
-    """Process AI model response and save as training data"""
-    global CONVERSATION_BUFFER
-    data = request.get_json()
-    user_input = data.get("user_input", "").strip()
-    ai_response = data.get("ai_response", "").strip()
-    model_used = data.get("model_used", "gpt-4o-mini")
-    if not user_input or not ai_response:
-        return jsonify({"error": "Missing user_input or ai_response"}), 400
-    training_entry = {
-        "input": user_input,
-        "output": ai_response,
-        "model_used": model_used,
-        "timestamp": datetime.now().isoformat(),
-        "training_id": str(time.time()),
-        "learning_score": 1.0,
-        "is_new_learning": True,
-        "auto_generated": False,
-        "context": {
-            "query_length": len(user_input),
-            "response_length": len(ai_response),
-            "training_mode": "manual_openai_gpt4o_mini",
-            "source": "user_interaction"
-        }
     }
-    CONVERSATION_BUFFER.append(training_entry)
-    if len(CONVERSATION_BUFFER) >= MAX_BUFFER_SIZE:
-        save_to_training_dataset(CONVERSATION_BUFFER.copy())
-        CONVERSATION_BUFFER.clear()
     return jsonify({
         "success": True,
-        "buffered": len(CONVERSATION_BUFFER),
-        "message": f"Training example buffered ({len(CONVERSATION_BUFFER)}/{MAX_BUFFER_SIZE})"
     })
 @app.route("/api/dataset-stats", methods=["GET"])
 def dataset_stats():
-    """Get statistics about the training dataset"""
     try:
         training_data = load_training_dataset()
-        total_examples = len(training_data)
-        total_tokens = sum(len(d.get("input", "")) + len(d.get("output", "")) for d in training_data)
-        models_used = {}
-        auto_generated = sum(1 for d in training_data if d.get("auto_generated", False))
-        for example in training_data:
-            model = example.get("model_used", "unknown")
-            models_used[model] = models_used.get(model, 0) + 1
         return jsonify({
             "success": True,
-            "total_examples": total_examples,
-            "total_tokens": total_tokens,
-            "models_used": models_used,
-            "buffered": len(CONVERSATION_BUFFER),
-            "auto_generated": auto_generated,
-            "manual_generated": total_examples - auto_generated,
             "dataset_url": f"https://huggingface.co/datasets/{TRAINING_DATASET}"
         })
     except Exception as e:
@@ -331,57 +269,67 @@ def dataset_stats():
 @app.route("/api/flush-buffer", methods=["POST"])
 def flush_buffer():
-    """Manually flush the training buffer to HuggingFace"""
-    global CONVERSATION_BUFFER
-    if not CONVERSATION_BUFFER:
         return jsonify({"message": "Buffer is empty, nothing to flush"})
-    success = save_to_training_dataset(CONVERSATION_BUFFER.copy())
-    count = len(CONVERSATION_BUFFER)
-    CONVERSATION_BUFFER.clear()
     if success:
         return jsonify({
             "success": True,
-            "message": f"Flushed {count} training examples to HuggingFace"
         })
     else:
         return jsonify({"error": "Failed to flush buffer"}), 500
 if __name__ == "__main__":
-    port = int(os.getenv("PORT", 7860))
-    print("🚀 Starting ContinuumLearner Training Server...")
     print(f"📊 Training Dataset: {TRAINING_DATASET}")
     print(f"🎓 Dataset URL: https://huggingface.co/datasets/{TRAINING_DATASET}")
     print("")
-    print("🤖 Training Mode: OpenAI GPT-4o-mini with AUTO-TRAINING")
-    print("   - Automatic training enabled")
-    print(f"   - Training interval: {AUTO_TRAINING_INTERVAL} seconds")
-    print(f"   - Prompts per session: {TRAINING_PROMPTS_PER_SESSION}")
-    print("   - Very low credit usage per request")
-    print("   - Responses are saved as training data")
-    print("   - ContinuumGPT learns continuously")
     print("")
     if OPENAI_API_KEY:
         print("✅ OpenAI API Key Configured")
-        # Start auto-training worker in background thread
-        training_thread = threading.Thread(target=auto_train_worker, daemon=True)
-        training_thread.start()
-        print("✅ Auto-training worker started")
     else:
         print("⚠️ OpenAI API Key Missing - Add OPENAI_API_KEY to enable")
-        print("   - Auto-training will be paused until API key is added")
     if HF_TOKEN:
         print("✅ HuggingFace Integration Active")
         training_data = load_training_dataset()
-        print(f"📚 Current dataset size: {len(training_data)} training examples")
     else:
         print("⚠️ HuggingFace Integration Disabled - Add HF_TOKEN to enable")
-        print("   - Training data will be stored in buffer only")
     print("")
-    app.run(host="0.0.0.0", port=port, debug=False, threaded=True)

 import os
 import json
 import time
 from flask_cors import CORS
 from datasets import load_dataset, Dataset
 from openai import OpenAI
 app = Flask(__name__, static_folder=".", static_url_path="")
 CORS(app, supports_credentials=True)
 # HuggingFace Configuration
 HF_TOKEN = os.getenv("HF_TOKEN")
 TRAINING_DATASET = "Sahil5112/ContinuumGPT"
+# Hierarchical Memory Configuration
+LEVEL_1_MAX = 20  # Max entries before archiving to Level 2
+LEVEL_2_MAX = 50  # Max entries before archiving to Level 3
+AUTO_ARCHIVE_ENABLED = True
+AUTO_ARCHIVE_INTERVAL = 600  # 10 minutes
+# Memory buffer
+MEMORY_BUFFER = []
 def load_training_dataset():
     """Load existing training data from HuggingFace"""
         existing_data.extend(training_examples)
         dataset = Dataset.from_list(existing_data)
         dataset.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
+        print(f"✅ Saved {len(training_examples)} entries to {TRAINING_DATASET}")
+        print(f"📊 Total dataset size: {len(existing_data)} entries")
         return True
     except Exception as e:
         print(f"❌ Error saving to dataset: {e}")
 def call_openai_gpt4o_mini(prompt):
     """Call OpenAI GPT-4o-mini"""
     if not client:
+        return None
     try:
         response = client.chat.completions.create(
             model="gpt-4o-mini",
             messages=[
+                {"role": "system", "content": "You are ContinuumGPT, a helpful AI assistant."},
                 {"role": "user", "content": prompt}
             ],
             temperature=0.7,
             max_tokens=1000
         )
+        return response.choices[0].message.content
+    except Exception as e:
+        print(f"OpenAI API Error: {e}")
+        return None
+def summarize_conversation(query, response):
+    """Create a summary of the conversation using AI"""
+    if not client:
+        # Fallback summary without AI
+        return f"{query[:50]}... -> {response[:50]}..."
+    try:
+        summary_prompt = f"Summarize this conversation in one sentence:\nUser: {query}\nAI: {response}"
+        summary = call_openai_gpt4o_mini(summary_prompt)
+        return summary if summary else f"Q&A about {query[:30]}..."
+    except:
+        return f"Q&A about {query[:30]}..."
+def create_super_summary(summaries):
+    """Create a global super-summary from multiple summaries"""
+    if not client or not summaries:
+        return "General knowledge compilation"
+    try:
+        combined = "\n".join(summaries[:10])  # Use last 10 summaries
+        prompt = f"Create a brief summary of these conversation topics:\n{combined}"
+        super_summary = call_openai_gpt4o_mini(prompt)
+        return super_summary if super_summary else "General knowledge compilation"
+    except:
+        return "General knowledge compilation"
+def archive_level_1_to_level_2():
+    """Archive Level 1 entries to Level 2 with compression"""
+    global MEMORY_BUFFER
+    dataset = load_training_dataset()
+    level_1_entries = [d for d in dataset if d.get("level") == 1 and not d.get("archived")]
+    if len(level_1_entries) <= LEVEL_1_MAX:
+        return
+    print(f"📦 Archiving {len(level_1_entries)} Level 1 entries to Level 2...")
+    archived_entries = []
+    for entry in level_1_entries:
+        archived_entry = {
+            "query": entry.get("query"),
+            "response": entry.get("response"),
+            "summary": entry.get("summary"),
+            "archived": True,
+            "level": 2,
+            "counter": entry.get("counter", 1) + 1,
+            "timestamp": datetime.now().isoformat(),
+            "original_timestamp": entry.get("timestamp")
         }
+        archived_entries.append(archived_entry)
+    # Update dataset
+    updated_dataset = [d for d in dataset if d.get("level") != 1 or d.get("archived")]
+    updated_dataset.extend(archived_entries)
+    if HF_TOKEN:
+        dataset_obj = Dataset.from_list(updated_dataset)
+        dataset_obj.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
+        print(f"✅ Archived to Level 2: {len(archived_entries)} entries")
+def archive_level_2_to_level_3():
+    """Archive Level 2 entries to Level 3 with super-summarization"""
+    dataset = load_training_dataset()
+    level_2_entries = [d for d in dataset if d.get("level") == 2]
+    if len(level_2_entries) <= LEVEL_2_MAX:
+        return
+    print(f"🗜️ Creating Level 3 super-summary from {len(level_2_entries)} Level 2 entries...")
+    summaries = [d.get("summary", "") for d in level_2_entries if d.get("summary")]
+    super_summary = create_super_summary(summaries)
+    level_3_entry = {
+        "query": "Global Knowledge Archive",
+        "response": super_summary,
+        "summary": super_summary,
+        "archived": True,
+        "level": 3,
+        "counter": len(level_2_entries),
+        "timestamp": datetime.now().isoformat(),
+        "entries_compressed": len(level_2_entries)
+    }
+    # Keep only Level 1, Level 3, and recent Level 2 entries
+    updated_dataset = [d for d in dataset if d.get("level") != 2]
+    updated_dataset.append(level_3_entry)
+    if HF_TOKEN:
+        dataset_obj = Dataset.from_list(updated_dataset)
+        dataset_obj.push_to_hub(TRAINING_DATASET, token=HF_TOKEN, private=False)
+        print(f"✅ Created Level 3 super-summary")
+def auto_archive_worker():
+    """Background worker that automatically archives memory levels"""
+    print("🗄️ Auto-archive worker started")
+    while AUTO_ARCHIVE_ENABLED:
         try:
+            if HF_TOKEN and client:
+                print("\n🔄 Running auto-archive check...")
+                archive_level_1_to_level_2()
+                archive_level_2_to_level_3()
+                print(f"⏳ Next archive check in {AUTO_ARCHIVE_INTERVAL}s...\n")
+            else:
+                print("⚠️ Auto-archive paused - need HF_TOKEN and OPENAI_API_KEY")
         except Exception as e:
+            print(f"❌ Auto-archive error: {e}")
+        time.sleep(AUTO_ARCHIVE_INTERVAL)
 @app.route("/")
 def index():
     return send_from_directory(".", "index.html")
+@app.route("/api/chat", methods=["POST"])
+def chat():
+    """Handle chat requests with hierarchical memory learning"""
+    global MEMORY_BUFFER
     data = request.get_json()
+    query = data.get("query", "").strip()
+    if not query:
+        return jsonify({"success": False, "error": "Missing query"}), 400
+    # Generate response
+    response = call_openai_gpt4o_mini(query)
+    if not response:
         return jsonify({
             "success": False,
+            "error": "OPENAI_API_KEY not set. Please add your OpenAI API key to enable AI.",
+            "response": None
         })
+    # Create summary
+    summary = summarize_conversation(query, response)
+    # Create Level 1 memory entry
+    memory_entry = {
+        "query": query,
+        "response": response,
+        "summary": summary,
+        "archived": False,
+        "level": 1,
+        "counter": 1,
+        "timestamp": datetime.now().isoformat()
     }
+    MEMORY_BUFFER.append(memory_entry)
+    # Save to HuggingFace if we have enough entries
+    if len(MEMORY_BUFFER) >= 5:
+        if save_to_training_dataset(MEMORY_BUFFER.copy()):
+            MEMORY_BUFFER.clear()
     return jsonify({
         "success": True,
+        "response": response,
+        "summary": summary,
+        "level": 1,
+        "buffered": len(MEMORY_BUFFER)
     })
 @app.route("/api/dataset-stats", methods=["GET"])
 def dataset_stats():
+    """Get statistics about the hierarchical memory dataset"""
     try:
         training_data = load_training_dataset()
+        level_1 = [d for d in training_data if d.get("level") == 1]
+        level_2 = [d for d in training_data if d.get("level") == 2]
+        level_3 = [d for d in training_data if d.get("level") == 3]
         return jsonify({
             "success": True,
+            "total_entries": len(training_data),
+            "level_1_fresh": len([d for d in level_1 if not d.get("archived")]),
+            "level_2_archived": len(level_2),
+            "level_3_super": len(level_3),
+            "buffered": len(MEMORY_BUFFER),
             "dataset_url": f"https://huggingface.co/datasets/{TRAINING_DATASET}"
         })
     except Exception as e:
 @app.route("/api/flush-buffer", methods=["POST"])
 def flush_buffer():
+    """Manually flush the memory buffer to HuggingFace"""
+    global MEMORY_BUFFER
+    if not MEMORY_BUFFER:
         return jsonify({"message": "Buffer is empty, nothing to flush"})
+    success = save_to_training_dataset(MEMORY_BUFFER.copy())
+    count = len(MEMORY_BUFFER)
+    MEMORY_BUFFER.clear()
     if success:
         return jsonify({
             "success": True,
+            "message": f"Flushed {count} entries to HuggingFace"
         })
     else:
         return jsonify({"error": "Failed to flush buffer"}), 500
+@app.route("/api/archive-now", methods=["POST"])
+def archive_now():
+    """Manually trigger archiving process"""
+    try:
+        archive_level_1_to_level_2()
+        archive_level_2_to_level_3()
+        return jsonify({"success": True, "message": "Archiving completed"})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
 if __name__ == "__main__":
+    port = int(os.getenv("PORT", 5000))
+    print("🚀 Starting ContinuumGPT Hierarchical Memory System...")
     print(f"📊 Training Dataset: {TRAINING_DATASET}")
     print(f"🎓 Dataset URL: https://huggingface.co/datasets/{TRAINING_DATASET}")
     print("")
+    print("🧠 Hierarchical Memory Architecture:")
+    print(f"   Level 1 (Fresh): Detailed Q&A (max {LEVEL_1_MAX} before archiving)")
+    print(f"   Level 2 (Archived): Compressed summaries (max {LEVEL_2_MAX} before archiving)")
+    print(f"   Level 3 (Super): Global knowledge compilation")
+    print(f"   Auto-archiving: Every {AUTO_ARCHIVE_INTERVAL} seconds")
     print("")
     if OPENAI_API_KEY:
         print("✅ OpenAI API Key Configured")
+        # Start auto-archive worker
+        archive_thread = threading.Thread(target=auto_archive_worker, daemon=True)
+        archive_thread.start()
+        print("✅ Auto-archive worker started")
     else:
         print("⚠️ OpenAI API Key Missing - Add OPENAI_API_KEY to enable")
     if HF_TOKEN:
         print("✅ HuggingFace Integration Active")
         training_data = load_training_dataset()
+        level_counts = {1: 0, 2: 0, 3: 0}
+        for d in training_data:
+            level = d.get("level", 1)
+            level_counts[level] = level_counts.get(level, 0) + 1
+        print(f"📚 Current dataset: L1={level_counts[1]}, L2={level_counts[2]}, L3={level_counts[3]}")
     else:
         print("⚠️ HuggingFace Integration Disabled - Add HF_TOKEN to enable")
     print("")
+    app.run(host="0.0.0.0", port=port, debug=False, threaded=True)