Spaces:

vedaco
/

veda-programming

Running

App Files Files Community

vedaco commited on 4 days ago

Commit

19602e4

verified ·

1 Parent(s): 3096f4a

Update app.py

Browse files

Files changed (1) hide show

app.py +394 -162

app.py CHANGED Viewed

@@ -1,239 +1,471 @@
 import gradio as gr
-import tensorflow as tf
-import numpy as np
 import os
 import json
 from model import VedaProgrammingLLM
 from tokenizer import VedaTokenizer
 from train import VedaTrainer, SAMPLE_CODE
-# Global state
-model = None
-tokenizer = None
-def initialize_model():
-    """Initialize or load model"""
-    global model, tokenizer
-    model_path = "veda_model"
-    config_file = os.path.join(model_path, "config.json")
-    try:
-        if os.path.exists(config_file):
-            print("Loading saved model...")
-            with open(config_file, 'r') as f:
-                config = json.load(f)
-            tokenizer = VedaTokenizer()
-            tokenizer.load(os.path.join(model_path, "tokenizer.json"))
-            model = VedaProgrammingLLM(
-                vocab_size=config['vocab_size'],
-                max_length=config['max_length'],
-                d_model=config['d_model'],
-                num_heads=config['num_heads'],
-                num_layers=config['num_layers'],
-                ff_dim=config['ff_dim']
-            )
-            dummy = tf.zeros((1, config['max_length']), dtype=tf.int32)
-            model(dummy)
-            model.load_weights(os.path.join(model_path, "weights.h5"))
-            print("Model loaded!")
         else:
-            print("Training new model...")
-            trainer = VedaTrainer(
-                data_path="programming.txt",
-                vocab_size=3000,
-                max_length=128,
-                batch_size=8
-            )
-            trainer.train(epochs=5, save_path=model_path)
-            model = trainer.model
-            tokenizer = trainer.tokenizer
-            print("Model trained!")
-    except Exception as e:
-        print(f"Error: {e}")
-        print("Creating fresh model...")
-        trainer = VedaTrainer()
-        trainer.train(epochs=5)
-        model = trainer.model
-        tokenizer = trainer.tokenizer
-def generate_code(prompt: str, max_tokens: int, temperature: float, top_k: int) -> str:
-    """Generate code from prompt"""
-    global model, tokenizer
-    if model is None or tokenizer is None:
-        return "Model not loaded. Please wait..."
     try:
         if not prompt.strip():
-            return "Please enter a prompt."
-        tokens = tokenizer.encode(prompt)
-        if len(tokens) == 0:
-            tokens = [2]  # START token
-        generated = model.generate(
-            tokens,
-            max_new_tokens=int(max_tokens),
             temperature=float(temperature),
             top_k=int(top_k)
         )
-        result = tokenizer.decode(generated)
-        return result
-    except Exception as e:
-        return f"Error: {str(e)}"
-def train_on_data(training_data: str, epochs: int) -> str:
-    """Train model on provided data"""
-    global model, tokenizer
-    try:
-        with open("programming.txt", 'w') as f:
-            f.write(training_data)
-        trainer = VedaTrainer(
-            data_path="programming.txt",
-            vocab_size=3000,
-            max_length=128,
-            batch_size=8
         )
-        history = trainer.train(epochs=int(epochs), save_path="veda_model")
-        model = trainer.model
-        tokenizer = trainer.tokenizer
-        final_loss = history.history['loss'][-1]
-        final_acc = history.history.get('accuracy', [0])[-1]
         return f"""✅ Training Complete!
-Loss: {final_loss:.4f}
-Accuracy: {final_acc:.4f}
-Epochs: {epochs}
-Vocab Size: {tokenizer.vocabulary_size}
 """
-    except Exception as e:
-        return f"❌ Training Error: {str(e)}"
-def get_model_info() -> str:
-    """Get model information"""
-    global model, tokenizer
-    if model is None:
-        return "No model loaded."
-    config = model.get_config()
-    params = model.count_params()
     return f"""## 🕉️ Veda Programming LLM
 | Property | Value |
 |----------|-------|
-| Vocabulary Size | {config['vocab_size']} |
-| Max Length | {config['max_length']} |
-| Model Dimension | {config['d_model']} |
-| Attention Heads | {config['num_heads']} |
-| Transformer Layers | {config['num_layers']} |
-| FFN Dimension | {config['ff_dim']} |
-| **Total Parameters** | **{params:,}** |
 """
-# Build interface
 def create_app():
-    with gr.Blocks(title="Veda Programming", theme=gr.themes.Soft()) as app:
         gr.Markdown("""
         # 🕉️ Veda Programming LLM
-        ### TensorFlow-based Code Generation Model
         """)
         with gr.Tabs():
-            # Generation Tab
             with gr.TabItem("💻 Generate Code"):
                 with gr.Row():
-                    with gr.Column():
                         prompt = gr.Textbox(
-                            label="Code Prompt",
-                            placeholder="def fibonacci(",
-                            lines=3,
-                            value="def calculate_sum("
                         )
                         with gr.Row():
-                            max_tokens = gr.Slider(10, 200, value=50, step=5, label="Max Tokens")
-                            temperature = gr.Slider(0.1, 1.5, value=0.8, step=0.1, label="Temperature")
-                        top_k = gr.Slider(1, 100, value=40, step=5, label="Top-K")
-                        gen_btn = gr.Button("🚀 Generate", variant="primary")
-                    with gr.Column():
-                        output = gr.Code(label="Generated Code", language="python", lines=12)
                 gen_btn.click(
                     generate_code,
-                    inputs=[prompt, max_tokens, temperature, top_k],
-                    outputs=output
                 )
                 gr.Examples(
                     examples=[
-                        ["def fibonacci(n):", 60, 0.7, 40],
-                        ["def bubble_sort(arr):", 80, 0.8, 40],
-                        ["class Calculator:", 100, 0.7, 50],
-                        ["def binary_search(", 70, 0.8, 40],
                     ],
-                    inputs=[prompt, max_tokens, temperature, top_k],
-                    outputs=output,
-                    fn=generate_code
                 )
-            # Training Tab
-            with gr.TabItem("🎓 Train Model"):
-                training_input = gr.Textbox(
-                    label="Training Code",
-                    placeholder="Paste Python code...",
-                    lines=12,
-                    value=SAMPLE_CODE[:1500]
-                )
-                epochs_slider = gr.Slider(1, 20, value=5, step=1, label="Epochs")
-                train_btn = gr.Button("🎯 Train Model", variant="primary")
-                train_output = gr.Textbox(label="Training Results", lines=8)
-                train_btn.click(
-                    train_on_data,
-                    inputs=[training_input, epochs_slider],
-                    outputs=train_output
-                )
-            # Info Tab
             with gr.TabItem("ℹ️ Model Info"):
                 info_output = gr.Markdown()
-                refresh_btn = gr.Button("🔄 Refresh")
-                refresh_btn.click(get_model_info, outputs=info_output)
         gr.Markdown("""
         ---
-        **Veda Programming LLM** | Built with TensorFlow & Gradio
         """)
     return app
-# Main
-print("🕉️ Initializing Veda Programming LLM...")
-initialize_model()
-print("🚀 Starting Gradio...")
-app = create_app()
-app.launch(server_name="0.0.0.0", server_port=7860)

+"""Gradio interface for Veda Programming LLM with continuous learning"""
 import gradio as gr
 import os
 import json
+from datetime import datetime
 from model import VedaProgrammingLLM
 from tokenizer import VedaTokenizer
+from data_collector import collector
+from continuous_trainer import trainer
+from database import db
 from train import VedaTrainer, SAMPLE_CODE
+from config import (
+    MODEL_DIR, DEFAULT_TEMPERATURE, DEFAULT_MAX_TOKENS,
+    DEFAULT_REPETITION_PENALTY, DEFAULT_TOP_K
+)
+# Current interaction tracking
+current_interaction_id = None
+def initialize():
+    """Initialize the system"""
+    print("🕉️ Initializing Veda Programming LLM...")
+    print("=" * 50)
+    # Try to load existing model
+    if trainer.load_model():
+        print("✅ Existing model loaded")
+    else:
+        print("📚 Training initial model...")
+        # Initial training
+        initial_trainer = VedaTrainer(
+            data_path="programming.txt",
+            vocab_size=5000,
+            max_length=256,
+            batch_size=8
+        )
+        initial_trainer.train(epochs=10, save_path=MODEL_DIR)
+        # Load the trained model into continuous trainer
+        trainer.load_model()
+    # Start auto-training scheduler
+    trainer.start_auto_training()
+    print("=" * 50)
+    print("✅ System ready!")
+def clean_output(text: str) -> str:
+    """Clean generated output"""
+    lines = text.split('\n')
+    cleaned = []
+    empty_count = 0
+    for line in lines:
+        if line.strip() == '':
+            empty_count += 1
+            if empty_count <= 2:
+                cleaned.append(line)
         else:
+            empty_count = 0
+            cleaned.append(line)
+    return '\n'.join(cleaned)
+def generate_code(
+    prompt: str,
+    max_tokens: int,
+    temperature: float,
+    repetition_penalty: float,
+    top_k: int
+) -> tuple:
+    """Generate code and track interaction"""
+    global current_interaction_id
+    if trainer.model is None:
+        return "⏳ Model loading...", -1
     try:
         if not prompt.strip():
+            return "⚠️ Please enter a prompt.", -1
+        # Generate
+        result = trainer.generate(
+            prompt=prompt,
+            max_tokens=int(max_tokens),
             temperature=float(temperature),
+            repetition_penalty=float(repetition_penalty),
             top_k=int(top_k)
         )
+        result = clean_output(result)
+        # Save interaction
+        current_interaction_id = collector.collect_interaction(
+            prompt=prompt,
+            generated_code=result,
+            temperature=temperature,
+            max_tokens=max_tokens
         )
+        return result, current_interaction_id
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return f"❌ Error: {str(e)}", -1
+def submit_feedback(interaction_id: int, is_positive: bool, edited_code: str = None):
+    """Submit feedback for generated code"""
+    if interaction_id < 0:
+        return "⚠️ No interaction to rate"
+    collector.record_feedback(
+        interaction_id=interaction_id,
+        is_positive=is_positive,
+        edited_code=edited_code if edited_code and edited_code.strip() else None
+    )
+    emoji = "👍" if is_positive else "👎"
+    pending = collector.get_pending_count()
+    msg = f"{emoji} Feedback recorded! Thank you for helping improve the model.\n"
+    msg += f"📊 Approved samples pending training: {pending}"
+    if trainer.should_retrain():
+        msg += "\n🔄 Enough samples collected - model will be retrained soon!"
+    return msg
+def positive_feedback(interaction_id, code):
+    return submit_feedback(int(interaction_id), True, code)
+def negative_feedback(interaction_id, code):
+    return submit_feedback(int(interaction_id), False, code)
+def manual_train(epochs: int):
+    """Manually trigger training"""
+    if trainer.is_training:
+        return "⏳ Training already in progress..."
+    result = trainer.train(epochs=int(epochs))
+    if result['status'] == 'success':
         return f"""✅ Training Complete!
+📊 Results:
+- Version: {result['version']}
+- Loss: {result['loss']:.4f}
+- Accuracy: {result['accuracy']:.4f}
+- Samples Used: {result['samples_used']}
 """
+    else:
+        return f"❌ Training Error: {result['message']}"
+def add_training_code(code: str, category: str):
+    """Add code directly to training data"""
+    if not code.strip():
+        return "⚠️ Please enter some code"
+    collector.add_training_sample(code, category)
+    return f"✅ Code added to training data!\nCategory: {category}"
+def get_statistics():
+    """Get system statistics"""
+    stats = collector.get_statistics()
+    status = trainer.get_status()
+    return f"""## 📊 System Statistics
+### Model Status
+| Property | Value |
+|----------|-------|
+| 🤖 Model Version | {status['model_version']} |
+| 🔄 Currently Training | {'Yes' if status['is_training'] else 'No'} |
+| 📈 Training Progress | {status['training_progress']:.0f}% |
+| ⏰ Last Training | {status['last_training'] or 'Never'} |
+### Learning Data
+| Metric | Count |
+|--------|-------|
+| 💬 Total Interactions | {stats['total_interactions']} |
+| 👍 Positive Feedback | {stats['positive_feedback']} |
+| 👎 Negative Feedback | {stats['negative_feedback']} |
+| ✅ Approved Samples | {stats['approved_samples']} |
+| 📚 Pending for Training | {status['pending_samples']} |
+| 🎯 Min Samples to Retrain | {status['min_samples_for_training']} |
+### Training History
+| Metric | Value |
+|--------|-------|
+| 🔄 Total Training Runs | {stats['training_runs']} |
+| 📝 Code Samples | {stats['code_samples']} |
+### Last 7 Days
+| Metric | Count |
+|--------|-------|
+| 🔢 Generations | {stats['recent_generations']} |
+| 👍 Positive | {stats['recent_positive']} |
+| 👎 Negative | {stats['recent_negative']} |
+| 📈 Approval Rate | {stats['approval_rate']:.1f}% |
+"""
+def get_recent_interactions():
+    """Get recent interactions for review"""
+    interactions = db.get_recent_interactions(limit=10)
+    if not interactions:
+        return "No interactions yet."
+    md = "## Recent Interactions\n\n"
+    for item in interactions:
+        feedback = "👍" if item['feedback'] > 0 else ("👎" if item['feedback'] < 0 else "⏳")
+        md += f"""### {item['timestamp']}
+**Prompt:** `{item['prompt'][:50]}...`
+**Feedback:** {feedback}
+---
+"""
+    return md
+def get_training_history():
+    """Get training history"""
+    history = db.get_training_history(limit=10)
+    if not history:
+        return "No training history yet."
+    md = "## Training History\n\n"
+    md += "| Date | Version | Samples | Loss | Accuracy |\n"
+    md += "|------|---------|---------|------|----------|\n"
+    for item in history:
+        md += f"| {item['timestamp'][:10]} | {item['model_version']} | "
+        md += f"{item['samples_used']} | {item['final_loss']:.4f} | {item['final_accuracy']:.4f} |\n"
+    return md
+def get_model_info():
+    """Get model architecture info"""
+    if trainer.model is None:
+        return "⏳ Model not loaded"
+    config = trainer.model.get_config()
+    params = trainer.model.count_params()
     return f"""## 🕉️ Veda Programming LLM
+### Architecture
 | Property | Value |
 |----------|-------|
+| 📚 Vocabulary Size | {config['vocab_size']:,} |
+| 📏 Max Sequence Length | {config['max_length']} |
+| 🧠 Model Dimension | {config['d_model']} |
+| 👁️ Attention Heads | {config['num_heads']} |
+| 📦 Transformer Layers | {config['num_layers']} |
+| 🔧 FFN Dimension | {config['ff_dim']} |
+| ⚡ **Total Parameters** | **{params:,}** |
+### Features
+- ✅ Continuous Learning from User Feedback
+- ✅ Automatic Retraining
+- ✅ Repetition Penalty
+- ✅ Top-K & Top-P Sampling
+- ✅ Temperature Control
+- ✅ Model Versioning
 """
+# Create the interface
 def create_app():
+    with gr.Blocks(
+        title="Veda Programming LLM",
+        theme=gr.themes.Soft(),
+        css="""
+        .feedback-btn { min-width: 100px; }
+        .positive { background-color: #4CAF50 !important; }
+        .negative { background-color: #f44336 !important; }
+        """
+    ) as app:
+        # Hidden state for interaction tracking
+        interaction_id = gr.State(value=-1)
         gr.Markdown("""
         # 🕉️ Veda Programming LLM
+        ### AI Code Generation with Continuous Learning
+        This model learns from your feedback! Rate generated code to help improve it.
         """)
         with gr.Tabs():
+            # ============ Generation Tab ============
             with gr.TabItem("💻 Generate Code"):
                 with gr.Row():
+                    with gr.Column(scale=1):
                         prompt = gr.Textbox(
+                            label="📝 Code Prompt",
+                            placeholder="Enter your code prompt...",
+                            lines=4,
+                            value="def fibonacci(n):"
                         )
                         with gr.Row():
+                            max_tokens = gr.Slider(
+                                10, 300, value=DEFAULT_MAX_TOKENS,
+                                step=10, label="📏 Max Tokens"
+                            )
+                            temperature = gr.Slider(
+                                0.1, 1.5, value=DEFAULT_TEMPERATURE,
+                                step=0.1, label="🌡️ Temperature"
+                            )
+                        with gr.Row():
+                            repetition_penalty = gr.Slider(
+                                1.0, 2.0, value=DEFAULT_REPETITION_PENALTY,
+                                step=0.1, label="🔄 Repetition Penalty"
+                            )
+                            top_k = gr.Slider(
+                                10, 100, value=DEFAULT_TOP_K,
+                                step=5, label="🎯 Top-K"
+                            )
+                        gen_btn = gr.Button("🚀 Generate Code", variant="primary", size="lg")
+                    with gr.Column(scale=1):
+                        output = gr.Code(
+                            label="📄 Generated Code (Edit if needed before rating)",
+                            language="python",
+                            lines=15,
+                            interactive=True
+                        )
+                        gr.Markdown("### 📊 Rate this output to help improve the model:")
+                        with gr.Row():
+                            good_btn = gr.Button("👍 Good", variant="primary", elem_classes=["feedback-btn", "positive"])
+                            bad_btn = gr.Button("👎 Bad", variant="secondary", elem_classes=["feedback-btn", "negative"])
+                        feedback_output = gr.Textbox(label="Feedback Status", lines=2)
+                # Wire up generation
                 gen_btn.click(
                     generate_code,
+                    inputs=[prompt, max_tokens, temperature, repetition_penalty, top_k],
+                    outputs=[output, interaction_id]
+                )
+                # Wire up feedback
+                good_btn.click(
+                    positive_feedback,
+                    inputs=[interaction_id, output],
+                    outputs=feedback_output
                 )
+                bad_btn.click(
+                    negative_feedback,
+                    inputs=[interaction_id, output],
+                    outputs=feedback_output
+                )
+                # Examples
+                gr.Markdown("### 💡 Example Prompts")
                 gr.Examples(
                     examples=[
+                        ["def fibonacci(n):", 100, 0.7, 1.2, 50],
+                        ["def bubble_sort(arr):", 120, 0.7, 1.2, 50],
+                        ["class Calculator:", 150, 0.8, 1.3, 40],
+                        ["def binary_search(arr, target):", 100, 0.7, 1.2, 50],
                     ],
+                    inputs=[prompt, max_tokens, temperature, repetition_penalty, top_k]
                 )
+            # ============ Training Tab ============
+            with gr.TabItem("🎓 Training"):
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("### 🔄 Manual Training")
+                        gr.Markdown("Trigger training on collected approved samples.")
+                        train_epochs = gr.Slider(1, 20, value=5, step=1, label="Epochs")
+                        train_btn = gr.Button("🎯 Start Training", variant="primary")
+                        train_output = gr.Textbox(label="Training Output", lines=8)
+                        train_btn.click(manual_train, inputs=[train_epochs], outputs=train_output)
+                    with gr.Column():
+                        gr.Markdown("### 📝 Add Training Code")
+                        gr.Markdown("Contribute code directly to the training dataset.")
+                        code_input = gr.Textbox(
+                            label="Code",
+                            placeholder="Paste your Python code here...",
+                            lines=10
+                        )
+                        category = gr.Dropdown(
+                            choices=["function", "class", "algorithm", "utility", "other"],
+                            value="function",
+                            label="Category"
+                        )
+                        add_btn = gr.Button("➕ Add to Training Data")
+                        add_output = gr.Textbox(label="Status")
+                        add_btn.click(add_training_code, inputs=[code_input, category], outputs=add_output)
+            # ============ Statistics Tab ============
+            with gr.TabItem("📊 Statistics"):
+                stats_output = gr.Markdown()
+                refresh_stats = gr.Button("🔄 Refresh Statistics")
+                refresh_stats.click(get_statistics, outputs=stats_output)
+                gr.Markdown("---")
+                with gr.Row():
+                    with gr.Column():
+                        interactions_output = gr.Markdown()
+                        refresh_interactions = gr.Button("🔄 Refresh Interactions")
+                        refresh_interactions.click(get_recent_interactions, outputs=interactions_output)
+                    with gr.Column():
+                        history_output = gr.Markdown()
+                        refresh_history = gr.Button("🔄 Refresh History")
+                        refresh_history.click(get_training_history, outputs=history_output)
+            # ============ Model Info Tab ============
             with gr.TabItem("ℹ️ Model Info"):
                 info_output = gr.Markdown()
+                refresh_info = gr.Button("🔄 Refresh Info")
+                refresh_info.click(get_model_info, outputs=info_output)
+                gr.Markdown("""
+                ### 🧠 How Continuous Learning Works
+                1. **You generate code** using the model
+                2. **You rate the output** (👍 or 👎)
+                3. **Good outputs are saved** for training
+                4. **When enough samples collect**, the model retrains
+                5. **The model improves** based on your feedback!
+                ### 💡 Tips
+                - Rate outputs honestly to help the model learn
+                - Edit code before rating if it's close but not perfect
+                - The more you use it, the better it gets!
+                - Contribute your own code samples for faster learning
+                """)
         gr.Markdown("""
         ---
+        **🕉️ Veda Programming LLM** | Continuous Learning System |
+        Built with TensorFlow & Gradio
         """)
     return app
+# Main execution
+if __name__ == "__main__":
+    initialize()
+    print("\n🚀 Starting Gradio Interface...")
+    app = create_app()
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )