Spaces:

vedaco
/

veda-programming

Sleeping

App Files Files Community

vedaco commited on Jan 9

Commit

108c40e

verified ·

1 Parent(s): ed6fa70

Update app.py

Browse files

Files changed (1) hide show

app.py +204 -387

app.py CHANGED Viewed

@@ -1,54 +1,72 @@
-"""Gradio interface for Veda Programming LLM with continuous learning"""
 import gradio as gr
 import os
 import json
-from datetime import datetime
 from model import VedaProgrammingLLM
 from tokenizer import VedaTokenizer
-from data_collector import collector
-from continuous_trainer import trainer
 from database import db
-from train import VedaTrainer, SAMPLE_CODE
-from config import (
-    MODEL_DIR, DEFAULT_TEMPERATURE, DEFAULT_MAX_TOKENS,
-    DEFAULT_REPETITION_PENALTY, DEFAULT_TOP_K
-)
-# Current interaction tracking
-current_interaction_id = None
 def initialize():
-    """Initialize the system"""
-    print("🕉️ Initializing Veda Programming LLM...")
-    print("=" * 50)
-    # Try to load existing model
-    if trainer.load_model():
-        print("✅ Existing model loaded")
-    else:
-        print("📚 Training initial model...")
-        # Initial training
-        initial_trainer = VedaTrainer(
-            data_path="programming.txt",
-            vocab_size=5000,
-            max_length=256,
-            batch_size=8
         )
-        initial_trainer.train(epochs=10, save_path=MODEL_DIR)
-        # Load the trained model into continuous trainer
-        trainer.load_model()
-    # Start auto-training scheduler
-    trainer.start_auto_training()
-    print("=" * 50)
-    print("✅ System ready!")
-def clean_output(text: str) -> str:
-    """Clean generated output"""
     lines = text.split('\n')
     cleaned = []
     empty_count = 0
@@ -62,410 +80,209 @@ def clean_output(text: str) -> str:
             empty_count = 0
             cleaned.append(line)
-    return '\n'.join(cleaned)
-def generate_code(
-    prompt: str,
-    max_tokens: int,
-    temperature: float,
-    repetition_penalty: float,
-    top_k: int
-) -> tuple:
-    """Generate code and track interaction"""
-    global current_interaction_id
-    if trainer.model is None:
-        return "⏳ Model loading...", -1
     try:
-        if not prompt.strip():
-            return "⚠️ Please enter a prompt.", -1
-        # Generate
-        result = trainer.generate(
-            prompt=prompt,
-            max_tokens=int(max_tokens),
-            temperature=float(temperature),
-            repetition_penalty=float(repetition_penalty),
-            top_k=int(top_k)
-        )
-        result = clean_output(result)
-        # Save interaction
-        current_interaction_id = collector.collect_interaction(
-            prompt=prompt,
-            generated_code=result,
             temperature=temperature,
-            max_tokens=max_tokens
         )
-        return result, current_interaction_id
     except Exception as e:
         import traceback
         traceback.print_exc()
-        return f"❌ Error: {str(e)}", -1
-def submit_feedback(interaction_id: int, is_positive: bool, edited_code: str = None):
-    """Submit feedback for generated code"""
-    if interaction_id < 0:
-        return "⚠️ No interaction to rate"
-    collector.record_feedback(
-        interaction_id=interaction_id,
-        is_positive=is_positive,
-        edited_code=edited_code if edited_code and edited_code.strip() else None
-    )
-    emoji = "👍" if is_positive else "👎"
-    pending = collector.get_pending_count()
-    msg = f"{emoji} Feedback recorded! Thank you for helping improve the model.\n"
-    msg += f"📊 Approved samples pending training: {pending}"
-    if trainer.should_retrain():
-        msg += "\n🔄 Enough samples collected - model will be retrained soon!"
-    return msg
-def positive_feedback(interaction_id, code):
-    return submit_feedback(int(interaction_id), True, code)
-def negative_feedback(interaction_id, code):
-    return submit_feedback(int(interaction_id), False, code)
-def manual_train(epochs: int):
-    """Manually trigger training"""
-    if trainer.is_training:
-        return "⏳ Training already in progress..."
-    result = trainer.train(epochs=int(epochs))
-    if result['status'] == 'success':
-        return f"""✅ Training Complete!
-📊 Results:
-- Version: {result['version']}
-- Loss: {result['loss']:.4f}
-- Accuracy: {result['accuracy']:.4f}
-- Samples Used: {result['samples_used']}
-"""
-    else:
-        return f"❌ Training Error: {result['message']}"
-def add_training_code(code: str, category: str):
-    """Add code directly to training data"""
-    if not code.strip():
-        return "⚠️ Please enter some code"
-    collector.add_training_sample(code, category)
-    return f"✅ Code added to training data!\nCategory: {category}"
-def get_statistics():
-    """Get system statistics"""
-    stats = collector.get_statistics()
-    status = trainer.get_status()
-    return f"""## 📊 System Statistics
-### Model Status
-| Property | Value |
-|----------|-------|
-| 🤖 Model Version | {status['model_version']} |
-| 🔄 Currently Training | {'Yes' if status['is_training'] else 'No'} |
-| 📈 Training Progress | {status['training_progress']:.0f}% |
-| ⏰ Last Training | {status['last_training'] or 'Never'} |
-### Learning Data
-| Metric | Count |
-|--------|-------|
-| 💬 Total Interactions | {stats['total_interactions']} |
-| 👍 Positive Feedback | {stats['positive_feedback']} |
-| 👎 Negative Feedback | {stats['negative_feedback']} |
-| ✅ Approved Samples | {stats['approved_samples']} |
-| 📚 Pending for Training | {status['pending_samples']} |
-| 🎯 Min Samples to Retrain | {status['min_samples_for_training']} |
-### Training History
-| Metric | Value |
-|--------|-------|
-| 🔄 Total Training Runs | {stats['training_runs']} |
-| 📝 Code Samples | {stats['code_samples']} |
-### Last 7 Days
-| Metric | Count |
-|--------|-------|
-| 🔢 Generations | {stats['recent_generations']} |
-| 👍 Positive | {stats['recent_positive']} |
-| 👎 Negative | {stats['recent_negative']} |
-| 📈 Approval Rate | {stats['approval_rate']:.1f}% |
-"""
-def get_recent_interactions():
-    """Get recent interactions for review"""
-    interactions = db.get_recent_interactions(limit=10)
-    if not interactions:
-        return "No interactions yet."
-    md = "## Recent Interactions\n\n"
-    for item in interactions:
-        feedback = "👍" if item['feedback'] > 0 else ("👎" if item['feedback'] < 0 else "⏳")
-        md += f"""### {item['timestamp']}
-**Prompt:** `{item['prompt'][:50]}...`
-**Feedback:** {feedback}
----
-"""
-    return md
-def get_training_history():
-    """Get training history"""
-    history = db.get_training_history(limit=10)
-    if not history:
-        return "No training history yet."
-    md = "## Training History\n\n"
-    md += "| Date | Version | Samples | Loss | Accuracy |\n"
-    md += "|------|---------|---------|------|----------|\n"
-    for item in history:
-        md += f"| {item['timestamp'][:10]} | {item['model_version']} | "
-        md += f"{item['samples_used']} | {item['final_loss']:.4f} | {item['final_accuracy']:.4f} |\n"
-    return md
-def get_model_info():
-    """Get model architecture info"""
-    if trainer.model is None:
-        return "⏳ Model not loaded"
-    config = trainer.model.get_config()
-    params = trainer.model.count_params()
-    return f"""## 🕉️ Veda Programming LLM
-### Architecture
-| Property | Value |
-|----------|-------|
-| 📚 Vocabulary Size | {config['vocab_size']:,} |
-| 📏 Max Sequence Length | {config['max_length']} |
-| 🧠 Model Dimension | {config['d_model']} |
-| 👁️ Attention Heads | {config['num_heads']} |
-| 📦 Transformer Layers | {config['num_layers']} |
-| 🔧 FFN Dimension | {config['ff_dim']} |
-| ⚡ **Total Parameters** | **{params:,}** |
-### Features
-- ✅ Continuous Learning from User Feedback
-- ✅ Automatic Retraining
-- ✅ Repetition Penalty
-- ✅ Top-K & Top-P Sampling
-- ✅ Temperature Control
-- ✅ Model Versioning
 """
-# Create the interface
 def create_app():
-    with gr.Blocks(
-        title="Veda Programming LLM",
-        theme=gr.themes.Soft(),
-        css="""
-        .feedback-btn { min-width: 100px; }
-        .positive { background-color: #4CAF50 !important; }
-        .negative { background-color: #f44336 !important; }
-        """
-    ) as app:
-        # Hidden state for interaction tracking
-        interaction_id = gr.State(value=-1)
         gr.Markdown("""
-        # 🕉️ Veda Programming LLM
-        ### AI Code Generation with Continuous Learning
-        This model learns from your feedback! Rate generated code to help improve it.
         """)
         with gr.Tabs():
-            # ============ Generation Tab ============
-            with gr.TabItem("💻 Generate Code"):
                 with gr.Row():
-                    with gr.Column(scale=1):
-                        prompt = gr.Textbox(
-                            label="📝 Code Prompt",
-                            placeholder="Enter your code prompt...",
-                            lines=4,
-                            value="def fibonacci(n):"
-                        )
-                        with gr.Row():
-                            max_tokens = gr.Slider(
-                                10, 300, value=DEFAULT_MAX_TOKENS,
-                                step=10, label="📏 Max Tokens"
-                            )
-                            temperature = gr.Slider(
-                                0.1, 1.5, value=DEFAULT_TEMPERATURE,
-                                step=0.1, label="🌡️ Temperature"
-                            )
-                        with gr.Row():
-                            repetition_penalty = gr.Slider(
-                                1.0, 2.0, value=DEFAULT_REPETITION_PENALTY,
-                                step=0.1, label="🔄 Repetition Penalty"
-                            )
-                            top_k = gr.Slider(
-                                10, 100, value=DEFAULT_TOP_K,
-                                step=5, label="🎯 Top-K"
-                            )
-                        gen_btn = gr.Button("🚀 Generate Code", variant="primary", size="lg")
-                    with gr.Column(scale=1):
-                        output = gr.Code(
-                            label="📄 Generated Code (Edit if needed before rating)",
-                            language="python",
-                            lines=15,
-                            interactive=True
-                        )
-                        gr.Markdown("### 📊 Rate this output to help improve the model:")
-                        with gr.Row():
-                            good_btn = gr.Button("👍 Good", variant="primary", elem_classes=["feedback-btn", "positive"])
-                            bad_btn = gr.Button("👎 Bad", variant="secondary", elem_classes=["feedback-btn", "negative"])
-                        feedback_output = gr.Textbox(label="Feedback Status", lines=2)
-                # Wire up generation
-                gen_btn.click(
-                    generate_code,
-                    inputs=[prompt, max_tokens, temperature, repetition_penalty, top_k],
-                    outputs=[output, interaction_id]
-                )
-                # Wire up feedback
-                good_btn.click(
-                    positive_feedback,
-                    inputs=[interaction_id, output],
-                    outputs=feedback_output
-                )
-                bad_btn.click(
-                    negative_feedback,
-                    inputs=[interaction_id, output],
-                    outputs=feedback_output
-                )
-                # Examples
-                gr.Markdown("### 💡 Example Prompts")
                 gr.Examples(
                     examples=[
-                        ["def fibonacci(n):", 100, 0.7, 1.2, 50],
-                        ["def bubble_sort(arr):", 120, 0.7, 1.2, 50],
-                        ["class Calculator:", 150, 0.8, 1.3, 40],
-                        ["def binary_search(arr, target):", 100, 0.7, 1.2, 50],
                     ],
-                    inputs=[prompt, max_tokens, temperature, repetition_penalty, top_k]
                 )
-            # ============ Training Tab ============
             with gr.TabItem("🎓 Training"):
-                with gr.Row():
-                    with gr.Column():
-                        gr.Markdown("### 🔄 Manual Training")
-                        gr.Markdown("Trigger training on collected approved samples.")
-                        train_epochs = gr.Slider(1, 20, value=5, step=1, label="Epochs")
-                        train_btn = gr.Button("🎯 Start Training", variant="primary")
-                        train_output = gr.Textbox(label="Training Output", lines=8)
-                        train_btn.click(manual_train, inputs=[train_epochs], outputs=train_output)
-                    with gr.Column():
-                        gr.Markdown("### 📝 Add Training Code")
-                        gr.Markdown("Contribute code directly to the training dataset.")
-                        code_input = gr.Textbox(
-                            label="Code",
-                            placeholder="Paste your Python code here...",
-                            lines=10
-                        )
-                        category = gr.Dropdown(
-                            choices=["function", "class", "algorithm", "utility", "other"],
-                            value="function",
-                            label="Category"
-                        )
-                        add_btn = gr.Button("➕ Add to Training Data")
-                        add_output = gr.Textbox(label="Status")
-                        add_btn.click(add_training_code, inputs=[code_input, category], outputs=add_output)
-            # ============ Statistics Tab ============
-            with gr.TabItem("📊 Statistics"):
-                stats_output = gr.Markdown()
-                refresh_stats = gr.Button("🔄 Refresh Statistics")
-                refresh_stats.click(get_statistics, outputs=stats_output)
-                gr.Markdown("---")
-                with gr.Row():
-                    with gr.Column():
-                        interactions_output = gr.Markdown()
-                        refresh_interactions = gr.Button("🔄 Refresh Interactions")
-                        refresh_interactions.click(get_recent_interactions, outputs=interactions_output)
-                    with gr.Column():
-                        history_output = gr.Markdown()
-                        refresh_history = gr.Button("🔄 Refresh History")
-                        refresh_history.click(get_training_history, outputs=history_output)
-            # ============ Model Info Tab ============
-            with gr.TabItem("ℹ️ Model Info"):
-                info_output = gr.Markdown()
-                refresh_info = gr.Button("🔄 Refresh Info")
-                refresh_info.click(get_model_info, outputs=info_output)
-                gr.Markdown("""
-                ### 🧠 How Continuous Learning Works
-                1. **You generate code** using the model
-                2. **You rate the output** (👍 or 👎)
-                3. **Good outputs are saved** for training
-                4. **When enough samples collect**, the model retrains
-                5. **The model improves** based on your feedback!
-                ### 💡 Tips
-                - Rate outputs honestly to help the model learn
-                - Edit code before rating if it's close but not perfect
-                - The more you use it, the better it gets!
-                - Contribute your own code samples for faster learning
-                """)
-        gr.Markdown("""
-        ---
-        **🕉️ Veda Programming LLM** | Continuous Learning System |
-        Built with TensorFlow & Gradio
-        """)
     return app
-# Main execution
 if __name__ == "__main__":
     initialize()
-    print("\n🚀 Starting Gradio Interface...")
     app = create_app()
-    app.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

+"""Gradio App - REPLACED with chat interface"""
 import gradio as gr
+import tensorflow as tf
 import os
 import json
 from model import VedaProgrammingLLM
 from tokenizer import VedaTokenizer
 from database import db
+from train import VedaTrainer
+from config import MODEL_DIR
+# Global state
+model = None
+tokenizer = None
+conversation_history = []
+current_conv_id = -1
 def initialize():
+    """Initialize the assistant"""
+    global model, tokenizer
+    print("🕉️ Initializing Veda Programming Assistant...")
+    config_path = os.path.join(MODEL_DIR, "config.json")
+    if os.path.exists(config_path):
+        print("Loading existing model...")
+        with open(config_path, 'r') as f:
+            config = json.load(f)
+        tokenizer = VedaTokenizer()
+        tokenizer.load(os.path.join(MODEL_DIR, "tokenizer.json"))
+        model = VedaProgrammingLLM(
+            vocab_size=config['vocab_size'],
+            max_length=config['max_length'],
+            d_model=config['d_model'],
+            num_heads=config['num_heads'],
+            num_layers=config['num_layers'],
+            ff_dim=config['ff_dim']
         )
+        dummy = tf.zeros((1, config['max_length']), dtype=tf.int32)
+        model(dummy)
+        model.load_weights(os.path.join(MODEL_DIR, "weights.h5"))
+        print("✅ Model loaded!")
+    else:
+        print("Training new model (this takes a few minutes)...")
+        trainer = VedaTrainer()
+        trainer.train(epochs=15)
+        model = trainer.model
+        tokenizer = trainer.tokenizer
+        print("✅ Model trained!")
+def clean_response(text: str) -> str:
+    """Clean the response"""
+    # Handle code blocks
+    text = text.replace("<CODE>", "\n```python\n")
+    text = text.replace("<ENDCODE>", "\n```\n")
+    # Remove special tokens
+    for token in ["<PAD>", "<UNK>", "<START>", "<END>", "<USER>", "<ASSISTANT>"]:
+        text = text.replace(token, "")
+    # Clean whitespace
     lines = text.split('\n')
     cleaned = []
     empty_count = 0
             empty_count = 0
             cleaned.append(line)
+    return '\n'.join(cleaned).strip()
+def generate_response(user_input: str, temperature: float = 0.7,
+                     max_tokens: int = 200) -> str:
+    """Generate a response"""
+    global current_conv_id
+    if model is None:
+        return "⏳ Model is loading..."
+    if not user_input.strip():
+        return "Please type a message!"
     try:
+        # Build context from history (last 3 exchanges)
+        context = ""
+        for msg in conversation_history[-3:]:
+            context += f"<USER> {msg['user']}\n<ASSISTANT> {msg['assistant']}\n"
+        # Add current input
+        prompt = context + f"<USER> {user_input}\n<ASSISTANT>"
+        # Encode
+        tokens = tokenizer.encode(prompt)
+        # Truncate if too long
+        if len(tokens) > model.max_length - max_tokens:
+            tokens = tokens[-(model.max_length - max_tokens):]
+        # Generate
+        generated = model.generate(
+            tokens,
+            max_new_tokens=max_tokens,
             temperature=temperature,
+            top_k=50,
+            top_p=0.9,
+            repetition_penalty=1.2
         )
+        # Decode
+        response = tokenizer.decode(generated)
+        # Extract assistant's response
+        if "<ASSISTANT>" in response:
+            parts = response.split("<ASSISTANT>")
+            response = parts[-1].strip()
+        if "<USER>" in response:
+            response = response.split("<USER>")[0].strip()
+        response = clean_response(response)
+        # Save to history
+        conversation_history.append({
+            'user': user_input,
+            'assistant': response
+        })
+        # Save to database
+        current_conv_id = db.save_conversation(user_input, response)
+        return response
     except Exception as e:
         import traceback
         traceback.print_exc()
+        return f"❌ Error: {str(e)}"
+def chat(user_input, history, temperature, max_tokens):
+    """Chat function for Gradio"""
+    response = generate_response(user_input, temperature, max_tokens)
+    history.append((user_input, response))
+    return "", history
+def feedback_good():
+    if current_conv_id > 0:
+        db.update_feedback(current_conv_id, 1)
+        return "👍 Thanks! This helps me improve."
+    return ""
+def feedback_bad():
+    if current_conv_id > 0:
+        db.update_feedback(current_conv_id, -1)
+        return "👎 Thanks for the feedback. I'll try to do better."
+    return ""
+def clear_conversation():
+    global conversation_history
+    conversation_history = []
+    return [], ""
+def retrain(epochs):
+    """Retrain with good conversations"""
+    global model, tokenizer
+    good_convs = db.get_good_conversations()
+    if not good_convs:
+        return "No approved conversations yet. Rate some responses first!"
+    extra_data = ""
+    for conv in good_convs:
+        extra_data += f"<USER> {conv['user_input']}\n"
+        extra_data += f"<ASSISTANT> {conv['assistant_response']}\n\n"
+    trainer = VedaTrainer()
+    history = trainer.train(epochs=int(epochs), extra_data=extra_data)
+    model = trainer.model
+    tokenizer = trainer.tokenizer
+    loss = history.history['loss'][-1]
+    return f"✅ Training done! Loss: {loss:.4f}, Used {len(good_convs)} conversations"
+def get_stats():
+    stats = db.get_stats()
+    return f"""## 📊 Statistics
+| Metric | Count |
+|--------|-------|
+| 💬 Conversations | {stats['total']} |
+| 👍 Positive | {stats['positive']} |
+| 👎 Negative | {stats['negative']} |
 """
+# Create interface
 def create_app():
+    with gr.Blocks(title="Veda Programming Assistant", theme=gr.themes.Soft()) as app:
         gr.Markdown("""
+        # 🕉️ Veda Programming Assistant
+        I can **chat**, **write code**, **explain concepts**, and **answer questions**!
         """)
         with gr.Tabs():
+            # Chat Tab
+            with gr.TabItem("💬 Chat"):
+                chatbot = gr.Chatbot(label="Conversation", height=400)
                 with gr.Row():
+                    msg = gr.Textbox(
+                        label="Your message",
+                        placeholder="Ask me anything about programming...",
+                        lines=2,
+                        scale=4
+                    )
+                    send_btn = gr.Button("Send 📤", variant="primary", scale=1)
+                with gr.Row():
+                    temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Creativity")
+                    max_tokens = gr.Slider(50, 400, value=200, step=50, label="Response length")
+                with gr.Row():
+                    good_btn = gr.Button("👍 Good", variant="secondary")
+                    bad_btn = gr.Button("👎 Bad", variant="secondary")
+                    clear_btn = gr.Button("🗑️ Clear", variant="secondary")
+                feedback_msg = gr.Textbox(label="", lines=1)
+                # Events
+                send_btn.click(chat, [msg, chatbot, temperature, max_tokens], [msg, chatbot])
+                msg.submit(chat, [msg, chatbot, temperature, max_tokens], [msg, chatbot])
+                good_btn.click(feedback_good, outputs=feedback_msg)
+                bad_btn.click(feedback_bad, outputs=feedback_msg)
+                clear_btn.click(clear_conversation, outputs=[chatbot, feedback_msg])
+                gr.Markdown("### 💡 Try these:")
                 gr.Examples(
                     examples=[
+                        ["Hello! What can you do?"],
+                        ["What is Python?"],
+                        ["Write a function to calculate factorial"],
+                        ["Explain what recursion is"],
+                        ["How do I read a file in Python?"],
+                        ["Write a bubble sort algorithm"],
+                        ["What's the difference between list and tuple?"],
                     ],
+                    inputs=msg
                 )
+            # Training Tab
             with gr.TabItem("🎓 Training"):
+                gr.Markdown("### Train on your approved conversations")
+                train_epochs = gr.Slider(5, 20, value=10, step=1, label="Epochs")
+                train_btn = gr.Button("🔄 Retrain", variant="primary")
+                train_output = gr.Markdown()
+                train_btn.click(retrain, [train_epochs], train_output)
+            # Stats Tab
+            with gr.TabItem("📊 Stats"):
+                stats_out = gr.Markdown()
+                refresh_btn = gr.Button("🔄 Refresh")
+                refresh_btn.click(get_stats, outputs=stats_out)
+        gr.Markdown("---\n**Veda Programming Assistant** | Learning from every conversation!")
     return app
+# Main
 if __name__ == "__main__":
     initialize()
+    print("\n🚀 Starting...")
     app = create_app()
+    app.launch(server_name="0.0.0.0", server_port=7860)