Spaces:

AiCoderv2
/

app-cmkofd-64

Runtime error

App Files Files Community

AiCoderv2 commited on Nov 3, 2025

Commit

6a6c658

verified ·

1 Parent(s): 6574073

Update Gradio app with multiple files

Browse files

Files changed (6) hide show

model_server.py +285 -0
requirements.txt +2 -0
run_client.py +144 -0
terminal_chatbot.py +297 -0
updated_app.py +271 -0
updated_models.py +213 -0

model_server.py ADDED Viewed

	@@ -0,0 +1,285 @@

+#!/usr/bin/env python3
+"""
+AI Coding Model Server
+FastAPI server that hosts the 5B parameter coding model
+"""
+import torch
+import spaces
+import uvicorn
+from fastapi import FastAPI, HTTPException, BackgroundTasks
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from typing import List, Dict, Any, Optional
+import logging
+import os
+import asyncio
+import threading
+from contextlib import asynccontextmanager
+# Import model components
+from models import CodeModel
+from utils import format_code_response, validate_code_syntax
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Global model instance
+code_model = None
+model_loading = False
+class ChatMessage(BaseModel):
+    """Chat message model."""
+    message: str = Field(..., description="User's message")
+    history: List[Dict[str, str]] = Field(default_factory=list, description="Chat history")
+    language: str = Field(default="python", description="Target programming language")
+    temperature: float = Field(default=0.7, ge=0.1, le=1.0, description="Generation temperature")
+class ChatResponse(BaseModel):
+    """Chat response model."""
+    choices: List[Dict[str, Dict[str, str]]] = Field(..., description="Generated responses")
+    history: List[Dict[str, str]] = Field(..., description="Updated chat history")
+    usage: Optional[Dict[str, int]] = Field(None, description="Token usage information")
+class HealthResponse(BaseModel):
+    """Health check response."""
+    status: str
+    model_loaded: bool
+    model_name: str
+    device: str
+    memory_usage: Optional[Dict[str, Any]] = None
+class ModelInfoResponse(BaseModel):
+    """Model information response."""
+    model_name: str
+    parameter_count: str
+    max_length: int
+    device: str
+    is_loaded: bool
+    vocab_size: int
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Application lifespan management."""
+    # Startup
+    logger.info("Starting up AI Coding Model Server...")
+    await load_model()
+    yield
+    # Shutdown
+    logger.info("Shutting down server...")
+async def load_model():
+    """Load the model in background."""
+    global code_model, model_loading
+    if code_model is not None or model_loading:
+        return
+    model_loading = True
+    logger.info("Loading coding model...")
+    try:
+        # Load model in thread to avoid blocking
+        loop = asyncio.get_event_loop()
+        code_model = await loop.run_in_executor(None, CodeModel)
+        if code_model.is_loaded:
+            logger.info(f"✅ Model loaded successfully: {code_model.model_name}")
+        else:
+            logger.error("❌ Failed to load model")
+    except Exception as e:
+        logger.error(f"❌ Error loading model: {e}")
+        code_model = None
+    finally:
+        model_loading = False
+def create_app() -> FastAPI:
+    """Create and configure the FastAPI application."""
+    # Create FastAPI app with lifespan management
+    app = FastAPI(
+        title="AI Coding Model Server",
+        description="FastAPI server hosting a 5B parameter coding model",
+        version="1.0.0",
+        lifespan=lifespan
+    )
+    # Add CORS middleware
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],  # Configure appropriately for production
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    @app.get("/", response_model=Dict[str, str])
+    async def root():
+        """Root endpoint."""
+        return {
+            "message": "AI Coding Model Server",
+            "version": "1.0.0",
+            "status": "running" if code_model and code_model.is_loaded else "loading"
+        }
+    @app.get("/health", response_model=HealthResponse)
+    async def health_check():
+        """Health check endpoint."""
+        if model_loading:
+            return HealthResponse(
+                status="loading",
+                model_loaded=False,
+                model_name="Loading...",
+                device="unknown"
+            )
+        if not code_model or not code_model.is_loaded:
+            raise HTTPException(status_code=503, detail="Model not loaded")
+        # Get memory usage if available
+        memory_info = None
+        if torch.cuda.is_available():
+            memory_info = {
+                "allocated": torch.cuda.memory_allocated() / 1024**3,  # GB
+                "cached": torch.cuda.memory_reserved() / 1024**3,     # GB
+                "total": torch.cuda.get_device_properties(0).total_memory / 1024**3
+            }
+        return HealthResponse(
+            status="healthy",
+            model_loaded=True,
+            model_name=code_model.model_name,
+            device=code_model.device,
+            memory_usage=memory_info
+        )
+    @app.get("/model/info", response_model=ModelInfoResponse)
+    async def model_info():
+        """Get detailed model information."""
+        if not code_model:
+            raise HTTPException(status_code=503, detail="Model not loaded")
+        info = code_model.get_model_info()
+        return ModelInfoResponse(**info)
+    @app.post("/api/chat", response_model=ChatResponse)
+    async def chat(request: ChatMessage):
+        """Main chat endpoint."""
+        if model_loading:
+            raise HTTPException(status_code=503, detail="Model is still loading")
+        if not code_model or not code_model.is_loaded:
+            raise HTTPException(status_code=503, detail="Model not loaded")
+        try:
+            # Generate response using the model
+            messages = request.history.copy()
+            messages.append({"role": "user", "content": request.message})
+            response_text = code_model.generate(
+                messages=messages,
+                temperature=request.temperature,
+                max_new_tokens=2048,
+                language=request.language
+            )
+            # Format the response
+            formatted_response = format_code_response(response_text)
+            # Update chat history
+            new_history = request.history.copy()
+            new_history.append({"role": "user", "content": request.message})
+            new_history.append({"role": "assistant", "content": formatted_response})
+            return ChatResponse(
+                choices=[{"message": {"content": formatted_response}}],
+                history=new_history
+            )
+        except Exception as e:
+            logger.error(f"Chat error: {e}")
+            raise HTTPException(status_code=500, detail=f"Generation error: {str(e)}")
+    @app.post("/api/validate-code")
+    async def validate_code(request: Dict[str, Any]):
+        """Validate code syntax."""
+        code = request.get("code", "")
+        language = request.get("language", "python")
+        if not code:
+            raise HTTPException(status_code=400, detail="No code provided")
+        validation_result = validate_code_syntax(code, language)
+        return validation_result
+    @app.get("/api/languages")
+    async def get_supported_languages():
+        """Get list of supported programming languages."""
+        return {
+            "languages": [
+                "python", "javascript", "java", "cpp", "c", "go", "rust",
+                "typescript", "php", "ruby", "swift", "kotlin", "sql",
+                "html", "css", "bash", "powershell"
+            ]
+        }
+    return app
+def run_server(host: str = "0.0.0.0", port: int = 8000, reload: bool = False):
+    """Run the FastAPI server."""
+    app = create_app()
+    console_info = f"""
+🚀 AI Coding Model Server Starting...
+📊 Server Info:
+   • Host: {host}
+   • Port: {port}
+   • Model: Loading...
+   • Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}
+🔗 Endpoints:
+   • Health: http://{host}:{port}/health
+   • Model Info: http://{host}:{port}/model/info
+   • Chat: http://{host}:{port}/api/chat
+   • API Docs: http://{host}:{port}/docs
+💡 Usage:
+   • Terminal client: python terminal_chatbot.py
+   • API calls: POST to /api/chat with chat messages
+   • Check status: GET /health
+⚡ Server is ready! Press Ctrl+C to stop.
+    """
+    print(console_info)
+    # Run server
+    uvicorn.run(
+        "model_server:create_app",
+        host=host,
+        port=port,
+        reload=reload,
+        log_level="info",
+        access_log=True
+    )
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="AI Coding Model Server")
+    parser.add_argument("--host", default="0.0.0.0", help="Server host")
+    parser.add_argument("--port", type=int, default=8000, help="Server port")
+    parser.add_argument("--reload", action="store_true", help="Auto-reload on changes")
+    args = parser.parse_args()
+    run_server(
+        host=args.host,
+        port=args.port,
+        reload=args.reload
+    )

requirements.txt CHANGED Viewed

@@ -19,3 +19,5 @@ matplotlib
 seaborn
 jupyter
 ipywidgets

 seaborn
 jupyter
 ipywidgets
+rich
+pydantic

run_client.py ADDED Viewed

	@@ -0,0 +1,144 @@

+#!/usr/bin/env python3
+"""
+Easy launcher for the terminal chatbot
+"""
+import subprocess
+import sys
+import os
+def main():
+    print("🤖 AI Coding Assistant Terminal Launcher")
+    print("=" * 50)
+    # Check if model_server.py exists
+    if not os.path.exists("model_server.py"):
+        print("❌ model_server.py not found!")
+        print("Make sure all files are in the same directory.")
+        sys.exit(1)
+    # Check if terminal_chatbot.py exists
+    if not os.path.exists("terminal_chatbot.py"):
+        print("❌ terminal_chatbot.py not found!")
+        print("Make sure all files are in the same directory.")
+        sys.exit(1)
+    print("📋 Files found:")
+    print("  ✅ model_server.py")
+    print("  ✅ terminal_chatbot.py")
+    print("  ✅ models.py")
+    print("  ✅ utils.py")
+    print()
+    # Ask user what they want to run
+    print("What would you like to run?")
+    print("1. Start model server (required for chatbot)")
+    print("2. Start terminal chatbot (requires running server)")
+    print("3. Start both (server in background, then chatbot)")
+    try:
+        choice = input("\nEnter your choice (1-3): ").strip()
+    except KeyboardInterrupt:
+        print("\n👋 Goodbye!")
+        sys.exit(0)
+    if choice == "1":
+        print("\n🚀 Starting model server...")
+        print("💡 Server will run on http://localhost:8000")
+        print("💡 Press Ctrl+C to stop")
+        try:
+            subprocess.run([sys.executable, "model_server.py"])
+        except KeyboardInterrupt:
+            print("\n🛑 Server stopped")
+    elif choice == "2":
+        print("\n🤖 Starting terminal chatbot...")
+        print("💡 Make sure the server is running first!")
+        print("💡 If you get connection errors, run option 1 first")
+        try:
+            subprocess.run([sys.executable, "terminal_chatbot.py"])
+        except KeyboardInterrupt:
+            print("\n👋 Chatbot stopped")
+    elif choice == "3":
+        print("\n🚀 Starting server in background...")
+        print("💡 Server will run on http://localhost:8000")
+        # Start server in background
+        server_process = subprocess.Popen([sys.executable, "model_server.py"])
+        try:
+            # Wait a bit for server to start
+            print("⏳ Waiting for server to start...")
+            import time
+            time.sleep(5)
+            print("🤖 Starting terminal chatbot...")
+            subprocess.run([sys.executable, "terminal_chatbot.py"])
+        except KeyboardInterrupt:
+            print("\n🛑 Stopping...")
+        finally:
+            # Clean up server process
+            print("🧹 Stopping server...")
+            server_process.terminate()
+            server_process.wait()
+            print("✅ Server stopped")
+    else:
+        print("❌ Invalid choice. Please run again and select 1, 2, or 3.")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()
+This creates a complete client-server architecture:
+## 🚀 **Key Features:**
+### **Terminal Chatbot (`terminal_chatbot.py`)**
+- Beautiful CLI interface with Rich formatting
+- Command support (`/help`, `/lang`, `/temp`, `/clear`, etc.)
+- Real-time API communication
+- Syntax-highlighted code display
+- Conversation history management
+### **Model Server (`model_server.py`)**
+- FastAPI server hosting the 5B+ parameter model
+- RESTful API endpoints for chat and model info
+- Health monitoring and status checking
+- CORS enabled for web clients
+- Background model loading
+### **Updated Gradio App (`updated_app.py`)**
+- Works with the API server
+- Real-time status monitoring
+- Same features as before but via API
+### **Easy Launcher (`run_client.py`)**
+- Simple menu-driven interface
+- Can start server, client, or both
+- Error checking and guidance
+## 📋 **How to Use:**
+1. **Start the server:**
+   python model_server.py
+2. **Start the terminal chatbot:**
+   python terminal_chatbot.py
+3. **Or use the easy launcher:**
+   python run_client.py
+4. **For the Gradio web interface:**
+   python updated_app.py
+## 🔗 **API Endpoints:**
+- `GET /health` - Check server status
+- `GET /model/info` - Get model information
+- `POST /api/chat` - Send chat messages
+- `POST /api/validate-code` - Validate code syntax
+- `GET /api/languages` - Get supported languages
+The terminal chatbot provides a professional CLI experience with syntax highlighting, command support, and real-time API communication!

terminal_chatbot.py ADDED Viewed

	@@ -0,0 +1,297 @@

+#!/usr/bin/env python3
+"""
+Terminal-based AI Coding Assistant
+A command-line interface for the 5B parameter coding model via API
+"""
+import requests
+import json
+import time
+import sys
+import os
+from typing import List, Dict, Any
+from rich.console import Console
+from rich.panel import Panel
+from rich.syntax import Syntax
+from rich.prompt import Prompt, Confirm
+from rich.markdown import Markdown
+from rich.table import Table
+from rich import print as rprint
+console = Console()
+class TerminalChatbot:
+    """Terminal-based chatbot client for the AI coding assistant."""
+    def __init__(self, server_url: str = "http://localhost:8000"):
+        self.server_url = server_url.rstrip('/')
+        self.api_url = f"{server_url}/api/chat"
+        self.history: List[Dict[str, str]] = []
+        self.current_language = "python"
+        self.temperature = 0.7
+    def check_server_connection(self) -> bool:
+        """Check if the model server is running."""
+        try:
+            response = requests.get(f"{self.server_url}/health", timeout=5)
+            return response.status_code == 200
+        except requests.exceptions.RequestException:
+            return False
+    def send_message(self, message: str) -> Dict[str, Any]:
+        """Send a message to the model server and get response."""
+        try:
+            payload = {
+                "message": message,
+                "history": self.history,
+                "language": self.current_language,
+                "temperature": self.temperature
+            }
+            response = requests.post(
+                self.api_url,
+                json=payload,
+                headers={"Content-Type": "application/json"},
+                timeout=60
+            )
+            if response.status_code == 200:
+                return response.json()
+            else:
+                return {
+                    "choices": [{"message": {"content": f"Server error: {response.status_code}"}}],
+                    "history": self.history
+                }
+        except requests.exceptions.RequestException as e:
+            return {
+                "choices": [{"message": {"content": f"Connection error: {str(e)}"}}],
+                "history": self.history
+            }
+    def format_response(self, response: str) -> None:
+        """Format and display the model's response."""
+        if not response:
+            return
+        # Split response into code blocks and text
+        parts = response.split('```')
+        for i, part in enumerate(parts):
+            if i % 2 == 0:  # Text parts
+                if part.strip():
+                    try:
+                        markdown = Markdown(part.strip())
+                        console.print(markdown)
+                    except:
+                        console.print(part.strip())
+            else:  # Code parts
+                lines = part.split('\n', 1)
+                if len(lines) >= 2:
+                    language = lines[0].strip() if lines[0].strip() else 'text'
+                    code = lines[1]
+                else:
+                    language = 'text'
+                    code = part
+                if code.strip():
+                    syntax = Syntax(code.strip(), language, theme="monokai", line_numbers=True)
+                    console.print(syntax)
+    def show_welcome(self) -> None:
+        """Display welcome message and help."""
+        welcome_text = """
+# 🤖 AI Coding Assistant - Terminal Version
+Welcome to your AI-powered coding companion! I can help you with:
+• **Code Generation** - Write functions, classes, and complete programs
+• **Debugging** - Find and fix errors in your code
+• **Algorithm Implementation** - From simple to complex algorithms
+• **Best Practices** - Clean, efficient, and readable code
+• **Concept Explanation** - Understand programming concepts
+## Quick Start Commands:
+• `/help` - Show this help
+• `/lang <language>` - Change programming language
+• `/temp <value>` - Set creativity (0.1-1.0)
+• `/clear` - Clear chat history
+• `/quit` or `/exit` - Exit the program
+## Example Prompts:
+• "Write a Python function to reverse a linked list"
+• "Create a React component for user authentication"
+• "Explain Big O notation with code examples"
+• "Debug this JavaScript code: [paste your code]"
+**Ready to code? Just ask me anything!**
+        """
+        panel = Panel(
+            Markdown(welcome_text),
+            title="AI Coder Terminal",
+            border_style="blue",
+            padding=(1, 2)
+        )
+        console.print(panel)
+    def show_settings(self) -> None:
+        """Display current settings."""
+        table = Table(title="Current Settings")
+        table.add_column("Setting", style="cyan")
+        table.add_column("Value", style="green")
+        table.add_column("Description", style="yellow")
+        table.add_row("Language", self.current_language, "Target programming language")
+        table.add_row("Temperature", str(self.temperature), "Creativity level (0.1-1.0)")
+        table.add_row("Server", self.server_url, "Model server URL")
+        table.add_row("History", str(len(self.history)), "Messages in conversation")
+        console.print(table)
+    def handle_command(self, command: str) -> bool:
+        """Handle special commands. Returns True if command was processed."""
+        cmd = command.lower().strip()
+        if cmd in ['/help', '/h']:
+            self.show_help()
+            return True
+        elif cmd.startswith('/lang '):
+            language = command.split(' ', 1)[1].strip()
+            self.current_language = language
+            console.print(f"[green]✓[/green] Language set to: {language}")
+            return True
+        elif cmd.startswith('/temp '):
+            try:
+                temp = float(command.split(' ', 1)[1].strip())
+                if 0.1 <= temp <= 1.0:
+                    self.temperature = temp
+                    console.print(f"[green]✓[/green] Temperature set to: {temp}")
+                else:
+                    console.print("[red]Temperature must be between 0.1 and 1.0[/red]")
+            except ValueError:
+                console.print("[red]Invalid temperature value[/red]")
+            return True
+        elif cmd in ['/settings', '/config']:
+            self.show_settings()
+            return True
+        elif cmd in ['/clear', '/reset']:
+            self.history = []
+            console.print("[green]✓[/green] Chat history cleared")
+            return True
+        elif cmd in ['/quit', '/exit', '/q']:
+            console.print("[yellow]Goodbye! 👋[/yellow]")
+            sys.exit(0)
+        else:
+            console.print(f"[red]Unknown command: {command}[/red]")
+            return True
+    def show_help(self) -> None:
+        """Display detailed help information."""
+        help_text = """
+# Available Commands
+## Chat Commands
+- **Regular text**: Ask questions or request code
+- **/help** or **/h**: Show this help message
+- **/settings**: Display current settings
+- **/clear**: Clear chat history
+## Configuration Commands
+- **/lang <language>**: Change programming language
+  - Example: `/lang javascript`
+- **/temp <value>**: Set creativity level (0.1-1.0)
+  - Example: `/temp 0.3` (more precise)
+  - Example: `/temp 0.9` (more creative)
+## Exit Commands
+- **/quit** or **/exit**: Exit the program
+## Programming Languages Supported
+python, javascript, java, cpp, c, go, rust, typescript,
+php, ruby, swift, kotlin, sql, html, css, bash, powershell
+        """
+        console.print(Panel(Markdown(help_text), title="Help", border_style="green"))
+    def run(self) -> None:
+        """Main chatbot loop."""
+        self.show_welcome()
+        # Check server connection
+        console.print("\n[yellow]Checking server connection...[/yellow]")
+        if not self.check_server_connection():
+            console.print(f"[red]❌ Cannot connect to server at {self.server_url}[/red]")
+            console.print("[yellow]💡 Make sure the model server is running with:[/yellow]")
+            console.print("[cyan]python model_server.py[/cyan]")
+            return
+        console.print("[green]✓[/green] Connected to model server!")
+        # Main interaction loop
+        while True:
+            try:
+                # Get user input
+                user_input = Prompt.ask(
+                    f"[bold blue]You[/bold blue] ({self.current_language})"
+                ).strip()
+                if not user_input:
+                    continue
+                # Handle commands
+                if user_input.startswith('/'):
+                    self.handle_command(user_input)
+                    continue
+                # Show typing indicator
+                with console.status("[bold green]AI is thinking...[/bold green]"):
+                    start_time = time.time()
+                    # Send to server and get response
+                    response_data = self.send_message(user_input)
+                    end_time = time.time()
+                    response_time = end_time - start_time
+                # Display response
+                if response_data and "choices" in response_data:
+                    response = response_data["choices"][0]["message"]["content"]
+                    # Display response with timing
+                    console.print(f"\n[dim]Response time: {response_time:.2f}s[/dim]")
+                    console.print(f"[bold green]AI:[/bold green]")
+                    # Format and display response
+                    self.format_response(response)
+                    # Update history
+                    self.history = response_data.get("history", self.history)
+                    console.print()  # Add spacing
+                else:
+                    console.print("[red]❌ Invalid response from server[/red]")
+            except KeyboardInterrupt:
+                console.print("\n[yellow]Interrupted by user[/yellow]")
+                if Confirm.ask("Exit the program?"):
+                    break
+            except Exception as e:
+                console.print(f"[red]❌ Error: {str(e)}[/red]")
+def main():
+    """Main entry point."""
+    # Check for custom server URL
+    server_url = "http://localhost:8000"
+    if len(sys.argv) > 1:
+        server_url = sys.argv[1]
+    console.print(f"[cyan]AI Coding Assistant Terminal[/cyan]")
+    console.print(f"[dim]Server: {server_url}[/dim]\n")
+    # Create and run chatbot
+    chatbot = TerminalChatbot(server_url)
+    chatbot.run()
+if __name__ == "__main__":
+    main()

updated_app.py ADDED Viewed

	@@ -0,0 +1,271 @@

+import gradio as gr
+import requests
+import json
+from typing import List, Dict, Any, Optional
+import os
+import time
+# Configuration
+API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000")
+API_HEALTH_URL = f"{API_BASE_URL}/health"
+API_CHAT_URL = f"{API_BASE_URL}/api/chat"
+API_INFO_URL = f"{API_BASE_URL}/model/info"
+def check_api_connection() -> Dict[str, Any]:
+    """Check if the model API server is running."""
+    try:
+        response = requests.get(API_HEALTH_URL, timeout=5)
+        if response.status_code == 200:
+            return response.json()
+        else:
+            return {"status": "error", "message": f"API returned status {response.status_code}"}
+    except requests.exceptions.RequestException as e:
+        return {"status": "error", "message": f"Connection failed: {str(e)}"}
+def chat_with_api(message: str, history: List[Dict[str, str]], language: str = "python", temperature: float = 0.7) -> Dict[str, Any]:
+    """Chat function that calls the model API."""
+    try:
+        # Check API connection first
+        health_status = check_api_connection()
+        if health_status.get("status") != "healthy":
+            return {
+                "choices": [{"message": {"content": f"❌ API Server Error: {health_status.get('message', 'Unknown error')}\n\n💡 Make sure the model server is running:\n```bash\npython model_server.py\n```"}}],
+                "history": history
+            }
+        payload = {
+            "message": message,
+            "history": history,
+            "language": language,
+            "temperature": temperature
+        }
+        response = requests.post(
+            API_CHAT_URL,
+            json=payload,
+            headers={"Content-Type": "application/json"},
+            timeout=60
+        )
+        if response.status_code == 200:
+            return response.json()
+        else:
+            return {
+                "choices": [{"message": {"content": f"API Error: {response.status_code} - {response.text}"}}],
+                "history": history
+            }
+    except requests.exceptions.RequestException as e:
+        return {
+            "choices": [{"message": {"content": f"Connection error: {str(e)}"}}],
+            "history": history
+        }
+    except Exception as e:
+        return {
+            "choices": [{"message": {"content": f"Error: {str(e)}"}}],
+            "history": history
+        }
+def get_model_info_api() -> Dict[str, Any]:
+    """Get model information from the API."""
+    try:
+        response = requests.get(API_INFO_URL, timeout=5)
+        if response.status_code == 200:
+            return response.json()
+        else:
+            return {"error": f"Failed to get model info: {response.status_code}"}
+    except Exception as e:
+        return {"error": f"Failed to get model info: {str(e)}"}
+def create_demo():
+    """Create the Gradio demo interface."""
+    with gr.Blocks(
+        title="AI Coder - 5B Parameter Chatbot (API)",
+        description="Powered by a 5B parameter language model via API server",
+        theme=gr.themes.Soft(),
+        css="""
+        .container {max-width: 1200px !important;}
+        .header {text-align: center; padding: 20px;}
+        .header h1 {color: #2d3748; margin-bottom: 10px;}
+        .header a {color: #3182ce; text-decoration: none; font-weight: bold;}
+        .header a:hover {text-decoration: underline;}
+        .status-indicator {padding: 10px; border-radius: 5px; margin: 10px 0;}
+        .status-online {background-color: #d4edda; color: #155724;}
+        .status-offline {background-color: #f8d7da; color: #721c24;}
+        .coding-section {background: #f7fafc; border-radius: 8px; padding: 15px; margin: 10px 0;}
+        """
+    ) as demo:
+        # Header
+        gr.HTML("""
+        <div class="header">
+            <h1>🤖 AI Coder - API Client</h1>
+            <p>AI chatbot with coding features powered by a 5B parameter model via API</p>
+            <p>Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">anycoder</a></p>
+        </div>
+        """)
+        # Status indicator
+        status_display = gr.HTML()
+        def update_status():
+            status = check_api_connection()
+            if status.get("status") == "healthy":
+                return f"""
+                <div class="status-indicator status-online">
+                    ✅ API Server: Online - Model: {status.get('model_name', 'Unknown')}
+                </div>
+                """
+            else:
+                return f"""
+                <div class="status-indicator status-offline">
+                    ❌ API Server: Offline - {status.get('message', 'Unknown error')}
+                </div>
+                """
+        # Main chat interface
+        with gr.Row():
+            # Left column - Chat
+            with gr.Column(scale=3):
+                chatbot = gr.Chatbot(
+                    label="AI Coding Assistant",
+                    height=600,
+                    type="messages",
+                    avatar_images=(None, "🤖"),
+                    show_copy_button=True
+                )
+                with gr.Row():
+                    msg = gr.Textbox(
+                        placeholder="Ask me to code something, debug code, or explain programming concepts...",
+                        lines=3,
+                        scale=4
+                    )
+                    send_btn = gr.Button("Send", variant="primary", scale=1)
+                with gr.Row():
+                    clear_btn = gr.Button("Clear Chat", variant="secondary")
+            # Right column - Controls
+            with gr.Column(scale=1):
+                gr.Markdown("### 🛠️ Settings")
+                language = gr.Dropdown(
+                    choices=[
+                        "python", "javascript", "java", "cpp", "c", "go",
+                        "rust", "typescript", "php", "ruby", "swift", "kotlin",
+                        "sql", "html", "css", "bash", "powershell"
+                    ],
+                    value="python",
+                    label="Programming Language"
+                )
+                temperature = gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.7,
+                    step=0.1,
+                    label="Creativity (Temperature)"
+                )
+                # API Status info
+                with gr.Accordion("🔗 API Status", open=True):
+                    status_text = gr.Markdown()
+                with gr.Accordion("🎯 Quick Prompts", open=False):
+                    gr.Examples(
+                        examples=[
+                            "Write a Python function to reverse a linked list",
+                            "Create a React component for a login form",
+                            "Debug this JavaScript code: [paste code]",
+                            "Explain Big O notation with examples",
+                            "Create a binary search algorithm in C++"
+                        ],
+                        inputs=msg,
+                        examples_per_page=3
+                    )
+                with gr.Accordion("ℹ️ API Info", open=False):
+                    api_info = gr.Markdown()
+                    def get_api_info():
+                        info = get_model_info_api()
+                        if "error" not in info:
+                            return f"""
+                            **Model:** {info.get('model_name', 'Unknown')}
+                            **Parameters:** {info.get('parameter_count', 'Unknown')}
+                            **Max Length:** {info.get('max_length', 'Unknown'):,} tokens
+                            **Device:** {info.get('device', 'Unknown')}
+                            **Status:** {'✅ Loaded' if info.get('is_loaded') else '⏳ Loading...'}
+                            **Vocab Size:** {info.get('vocab_size', 'Unknown'):,}
+                            """
+                        else:
+                            return f"❌ {info['error']}"
+                    api_info.value = get_api_info()
+        # Event handlers
+        def user(user_message, history):
+            return "", history + [{"role": "user", "content": user_message}]
+        def bot(history, selected_language, temp):
+            if not history:
+                return history
+            last_message = history[-1]["content"]
+            result = chat_with_api(last_message, history[:-1], selected_language, temp)
+            return result["history"]
+        # Wire up events
+        msg.submit(
+            user,
+            [msg, chatbot],
+            [msg, chatbot],
+            queue=False
+        ).then(
+            bot,
+            [chatbot, language, temperature],
+            chatbot
+        )
+        send_btn.click(
+            user,
+            [msg, chatbot],
+            [msg, chatbot],
+            queue=False
+        ).then(
+            bot,
+            [chatbot, language, temperature],
+            chatbot
+        )
+        clear_btn.click(
+            lambda: [{"role": "assistant", "content": "Hello! I'm your AI coding assistant. I can help you with Python, JavaScript, Java, C++, and many other programming languages. What would you like to code today?"}],
+            outputs=[chatbot]
+        )
+        # Update status periodically
+        def update_all_status():
+            status_html = update_status()
+            api_info_text = get_api_info()
+            return status_html, api_info_text
+        # Initial status update
+        status_display.value = update_status()
+        # Load initial message
+        chatbot.value = [{"role": "assistant", "content": "Hello! I'm your AI coding assistant powered by a 5B parameter language model via API. I can help you with Python, JavaScript, Java, C++, and many other programming languages. What would you like to code today?"}]
+    return demo
+if __name__ == "__main__":
+    demo = create_demo()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True,
+        share=False,
+        debug=True,
+        mcp_server=True
+    )

updated_models.py ADDED Viewed

	@@ -0,0 +1,213 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from typing import List, Dict, Any, Optional
+import logging
+import asyncio
+import threading
+logger = logging.getLogger(__name__)
+class CodeModel:
+    """5B Parameter coding model wrapper with optimized inference."""
+    def __init__(self):
+        self.model_name = "bigcode/starcoder2-7b"  # 7B model (closest to 5B with excellent coding)
+        self.parameter_count = "7B"
+        self.max_length = 16384
+        self.tokenizer = None
+        self.model = None
+        self.pipeline = None
+        self.is_loaded = False
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self._lock = threading.Lock()
+    @spaces.GPU(duration=1200)  # Extended duration for model loading
+    def load_model(self):
+        """Load the model (called via spaces decorator for optimization)."""
+        try:
+            logger.info(f"Loading {self.model_name} model...")
+            # Load tokenizer and model
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                trust_remote_code=True,
+                padding_side="left"
+            )
+            # Set pad token if not present
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            # Load model with optimization
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_name,
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
+                device_map="auto" if self.device == "cuda" else None,
+                trust_remote_code=True,
+                low_cpu_mem_usage=True,
+                use_cache=True
+            )
+            # Set model to evaluation mode
+            self.model.eval()
+            # Create pipeline for easier inference
+            self.pipeline = pipeline(
+                "text-generation",
+                model=self.model,
+                tokenizer=self.tokenizer,
+                device=0 if self.device == "cuda" else -1,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.95,
+                repetition_penalty=1.1,
+                max_new_tokens=2048,
+                pad_token_id=self.tokenizer.eos_token_id
+            )
+            self.is_loaded = True
+            logger.info(f"✅ {self.model_name} loaded successfully on {self.device}")
+        except Exception as e:
+            logger.error(f"❌ Error loading model: {e}")
+            self._fallback_model()
+    def _fallback_model(self):
+        """Fallback to a smaller model if the main model fails to load."""
+        try:
+            logger.info("Trying fallback model: microsoft/DialoGPT-medium")
+            self.model_name = "microsoft/DialoGPT-medium"
+            self.parameter_count = "345M"
+            self.max_length = 1024
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_name,
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
+                device_map="auto" if self.device == "cuda" else None
+            )
+            self.pipeline = pipeline(
+                "text-generation",
+                model=self.model,
+                tokenizer=self.tokenizer,
+                device=0 if self.device == "cuda" else -1,
+                max_new_tokens=512,
+                pad_token_id=self.tokenizer.eos_token_id
+            )
+            self.is_loaded = True
+            logger.info(f"✅ Fallback model loaded successfully")
+        except Exception as e:
+            logger.error(f"❌ Fallback model also failed: {e}")
+            self.is_loaded = False
+    def generate(
+        self,
+        messages: List[Dict[str, str]],
+        temperature: float = 0.7,
+        max_new_tokens: int = 2048,
+        language: str = "python"
+    ) -> str:
+        """Generate response from the model."""
+        if not self.is_loaded:
+            return "I'm sorry, the model is not loaded yet. Please try again in a moment."
+        try:
+            with self._lock:  # Ensure thread-safe access
+                # Convert chat format to text
+                if messages:
+                    # Format as conversation
+                    conversation = ""
+                    for msg in messages:
+                        role = msg["role"]
+                        content = msg["content"]
+                        if role == "system":
+                            conversation += f"System: {content}\n\n"
+                        elif role == "user":
+                            conversation += f"Human: {content}\n"
+                        elif role == "assistant":
+                            conversation += f"Assistant: {content}\n"
+                    # Add specific coding instructions
+                    if "write" in conversation.lower() or "code" in conversation.lower():
+                        conversation += f"\n\nPlease provide clean, well-commented {language} code with proper syntax and best practices."
+                    conversation += "\nAssistant:"
+                # Generate response
+                with torch.no_grad():
+                    if self.pipeline:
+                        # Use pipeline for generation
+                        outputs = self.pipeline(
+                            conversation,
+                            do_sample=True,
+                            temperature=temperature,
+                            top_p=0.95,
+                            repetition_penalty=1.1,
+                            max_new_tokens=max_new_tokens,
+                            pad_token_id=self.tokenizer.eos_token_id,
+                            eos_token_id=self.tokenizer.eos_token_id,
+                            return_full_text=False,
+                            clean_up_tokenization_spaces=True
+                        )
+                        if outputs and len(outputs) > 0:
+                            response = outputs[0]["generated_text"].strip()
+                            return response
+                    # Fallback to direct model generation
+                    inputs = self.tokenizer.encode(conversation, return_tensors="pt").to(self.device)
+                    with torch.no_grad():
+                        outputs = self.model.generate(
+                            inputs,
+                            do_sample=True,
+                            temperature=temperature,
+                            top_p=0.95,
+                            repetition_penalty=1.1,
+                            max_new_tokens=max_new_tokens,
+                            pad_token_id=self.tokenizer.eos_token_id,
+                            eos_token_id=self.tokenizer.eos_token_id,
+                            attention_mask=torch.ones_like(inputs)
+                        )
+                    # Decode response
+                    response = self.tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
+                    return response.strip()
+        except Exception as e:
+            logger.error(f"Generation error: {e}")
+            return f"I apologize, but I encountered an error while generating the response: {str(e)}"
+    def get_model_info(self) -> Dict[str, Any]:
+        """Get information about the loaded model."""
+        return {
+            "model_name": self.model_name,
+            "parameter_count": self.parameter_count,
+            "max_length": self.max_length,
+            "device": self.device,
+            "is_loaded": self.is_loaded,
+            "vocab_size": len(self.tokenizer) if self.tokenizer else 0
+        }
+# Global model instance for the server
+_global_model = None
+def get_model():
+    """Get or create the global model instance."""
+    global _global_model
+    if _global_model is None:
+        _global_model = CodeModel()
+        # Load model asynchronously
+        threading.Thread(target=_global_model.load_model, daemon=True).start()
+    return _global_model
+def CodeModel():
+    """Factory function for creating CodeModel instances."""
+    return CodeModel()