Spaces:

ndwdgda
/

cpu

Sleeping

App Files Files Community

Nhughes09 commited on Dec 11, 2025

Commit

b438512

1 Parent(s): e54f4cc

Add Ollama client with modular architecture for local AI

Browse files

Files changed (4) hide show

__pycache__/app.cpython-314.pyc +0 -0
__pycache__/ollama_client.cpython-314.pyc +0 -0
app.py +63 -61
ollama_client.py +195 -0

__pycache__/app.cpython-314.pyc ADDED Viewed

Binary file (6.73 kB). View file

__pycache__/ollama_client.cpython-314.pyc ADDED Viewed

Binary file (9.34 kB). View file

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
-# app.py - Main Gradio Application
 import gradio as gr
 from logging_config import setup_logging, log_banner, log_section, log_startup_info
-from cloudflare_client import CloudflareAIClient
-import json
 # ============================================================================
 #                    INITIALIZATION
@@ -11,71 +11,59 @@ logger = setup_logging()
 log_startup_info(logger)
 # ============================================================================
-#                    CLOUDFLARE CONFIGURATION
 # ============================================================================
-# Your Cloudflare Worker endpoint - update this when deployed!
-CLOUDFLARE_ENDPOINT = "https://cloudflarellamaworker.nlhughes08.workers.dev"
-log_section(logger, "CLOUDFLARE AI CLIENT SETUP")
-ai_client = CloudflareAIClient(logger, CLOUDFLARE_ENDPOINT)
-# Test connection on startup
-logger.info("Testing Cloudflare endpoint on startup...")
-connection_ok = ai_client.test_connection()
-if connection_ok:
-    logger.info("Cloudflare endpoint is REACHABLE")
 else:
-    logger.warning("Cloudflare endpoint may not be ready - check deployment")
 # ============================================================================
 #                    CHAT RESPONSE FUNCTION
 # ============================================================================
 def respond(message, history):
-    """Generate AI response using Cloudflare Workers AI."""
     log_section(logger, "NEW USER MESSAGE")
     logger.info(f"User: {message}")
     logger.info(f"History: {len(history)} previous messages")
-    # Build context from history (simple approach)
-    context = ""
-    if history:
-        for user_msg, bot_msg in history[-5:]:  # Last 5 exchanges
-            context += f"User: {user_msg}\n"
-            if bot_msg:
-                context += f"Assistant: {bot_msg}\n"
-    # Full prompt with context
-    if context:
-        full_prompt = f"{context}User: {message}\nAssistant:"
-    else:
-        full_prompt = message
-    logger.debug(f"Full prompt length: {len(full_prompt)} chars")
-    # Call Cloudflare AI
-    result = ai_client.call_ai(full_prompt)
     if result["success"]:
-        response_text = result["response"]
-        # Clean up any model artifacts like <|start_header_id|>
-        for tag in ["<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>"]:
-            response_text = response_text.replace(tag, "")
-        return response_text.strip()
     else:
-        # Return error with diagnostic info
         error_msg = result.get("error", "Unknown error")
-        diagnosis = result.get("diagnosis", {})
-        error_response = f"Error: {error_msg}\n\n"
-        if diagnosis.get("suggestions"):
-            error_response += "Troubleshooting:\n"
-            for suggestion in diagnosis["suggestions"]:
-                error_response += f"• {suggestion}\n"
-        error_response += f"\nEndpoint: {CLOUDFLARE_ENDPOINT}"
-        return error_response
 # ============================================================================
 #                    GRADIO UI
@@ -84,8 +72,8 @@ log_section(logger, "BUILDING GRADIO UI")
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# CPU Chatbot")
-    gr.Markdown("### Powered by Cloudflare Workers AI (Llama Guard 3)")
-    gr.Markdown(f"**Endpoint:** `{CLOUDFLARE_ENDPOINT}`")
     with gr.Row():
         with gr.Column(scale=4):
@@ -98,26 +86,37 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         with gr.Column(scale=1):
             gr.Markdown("### Status")
             status_box = gr.JSON(
-                label="Client Stats",
-                value=ai_client.get_stats()
             )
-            refresh_btn = gr.Button("Refresh Stats")
     def user_submit(message, history):
         if not message.strip():
-            return "", history, ai_client.get_stats()
-        return "", history + [[message, None]], ai_client.get_stats()
     def bot_respond(history):
         if not history:
-            return history, ai_client.get_stats()
         user_message = history[-1][0]
         bot_response = respond(user_message, history[:-1])
         history[-1][1] = bot_response
-        return history, ai_client.get_stats()
     def refresh_stats():
-        return ai_client.get_stats()
     msg.submit(user_submit, [msg, chatbot], [msg, chatbot, status_box], queue=False).then(
         bot_respond, chatbot, [chatbot, status_box]
@@ -125,9 +124,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     submit_btn.click(user_submit, [msg, chatbot], [msg, chatbot, status_box], queue=False).then(
         bot_respond, chatbot, [chatbot, status_box]
     )
     refresh_btn.click(refresh_stats, outputs=status_box)
-log_banner(logger, "SYSTEM READY - WAITING FOR MESSAGES")
 if __name__ == "__main__":
     demo.launch()

+# app.py - Main Gradio Application with Ollama Backend
 import gradio as gr
+import os
 from logging_config import setup_logging, log_banner, log_section, log_startup_info
+from ollama_client import OllamaClient
 # ============================================================================
 #                    INITIALIZATION
 log_startup_info(logger)
 # ============================================================================
+#                    OLLAMA CONFIGURATION
 # ============================================================================
+# Models to try in order of preference (smaller = faster, more reliable)
+MODELS = [
+    "llama3.2:3b",      # Fast, small
+    "gemma3:1b",        # Very fast, tiny
+    "phi3:mini",        # Good quality, medium
+    "deepseek-coder:6.7b-instruct-q6_K",  # Good for code
+]
+log_section(logger, "OLLAMA CLIENT SETUP")
+ollama = OllamaClient(logger, model=MODELS[0])
+# Check connection and find working model
+logger.info("Checking Ollama connection...")
+if ollama.check_connection():
+    logger.info("Ollama is running!")
+    available = ollama.list_models()
+    # Find first available preferred model
+    for model in MODELS:
+        if model in available:
+            ollama.model = model
+            logger.info(f"Selected model: {model}")
+            break
 else:
+    logger.warning("Ollama not available - running in limited mode")
 # ============================================================================
 #                    CHAT RESPONSE FUNCTION
 # ============================================================================
 def respond(message, history):
+    """Generate AI response using Ollama."""
     log_section(logger, "NEW USER MESSAGE")
     logger.info(f"User: {message}")
     logger.info(f"History: {len(history)} previous messages")
+    # Build messages array
+    messages = [{"role": "system", "content": "You are a helpful AI assistant. Be concise and helpful."}]
+    for user_msg, bot_msg in history[-5:]:  # Last 5 exchanges for context
+        messages.append({"role": "user", "content": user_msg})
+        if bot_msg:
+            messages.append({"role": "assistant", "content": bot_msg})
+    messages.append({"role": "user", "content": message})
+    # Call Ollama
+    result = ollama.chat(messages)
     if result["success"]:
+        return result["response"].strip()
     else:
         error_msg = result.get("error", "Unknown error")
+        return f"Error: {error_msg}\n\nMake sure Ollama is running: `ollama serve`"
 # ============================================================================
 #                    GRADIO UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# CPU Chatbot")
+    gr.Markdown(f"### Powered by Ollama ({ollama.model})")
+    gr.Markdown("*Using local AI - no cloud required!*")
     with gr.Row():
         with gr.Column(scale=4):
         with gr.Column(scale=1):
             gr.Markdown("### Status")
             status_box = gr.JSON(
+                label="Ollama Stats",
+                value=ollama.get_stats()
             )
+            model_dropdown = gr.Dropdown(
+                choices=ollama.available_models or MODELS,
+                value=ollama.model,
+                label="Model"
+            )
+            refresh_btn = gr.Button("Refresh")
     def user_submit(message, history):
         if not message.strip():
+            return "", history, ollama.get_stats()
+        return "", history + [[message, None]], ollama.get_stats()
     def bot_respond(history):
         if not history:
+            return history, ollama.get_stats()
         user_message = history[-1][0]
         bot_response = respond(user_message, history[:-1])
         history[-1][1] = bot_response
+        return history, ollama.get_stats()
+    def change_model(model):
+        ollama.model = model
+        logger.info(f"Switched to model: {model}")
+        return ollama.get_stats()
     def refresh_stats():
+        ollama.check_connection()
+        return ollama.get_stats()
     msg.submit(user_submit, [msg, chatbot], [msg, chatbot, status_box], queue=False).then(
         bot_respond, chatbot, [chatbot, status_box]
     submit_btn.click(user_submit, [msg, chatbot], [msg, chatbot, status_box], queue=False).then(
         bot_respond, chatbot, [chatbot, status_box]
     )
+    model_dropdown.change(change_model, model_dropdown, status_box)
     refresh_btn.click(refresh_stats, outputs=status_box)
+log_banner(logger, "SYSTEM READY - USING OLLAMA")
+logger.info(f"Model: {ollama.model}")
+logger.info("Run 'ollama serve' if not already running")
 if __name__ == "__main__":
     demo.launch()

ollama_client.py ADDED Viewed

	@@ -0,0 +1,195 @@

+# ollama_client.py - Ollama API Client
+import requests
+import json
+from datetime import datetime
+class OllamaClient:
+    """Client for calling local Ollama API."""
+    def __init__(self, logger, base_url="http://localhost:11434", model="llama3.2:3b"):
+        self.logger = logger
+        self.base_url = base_url
+        self.model = model
+        self.request_count = 0
+        self.success_count = 0
+        self.error_count = 0
+        self.last_error = None
+        self.available_models = []
+        self.logger.info(f"OllamaClient initialized")
+        self.logger.info(f"Base URL: {base_url}")
+        self.logger.info(f"Default Model: {model}")
+    def get_stats(self):
+        """Return client statistics."""
+        return {
+            "requests": self.request_count,
+            "successes": self.success_count,
+            "errors": self.error_count,
+            "model": self.model,
+            "last_error": self.last_error
+        }
+    def check_connection(self):
+        """Check if Ollama is running and accessible."""
+        self.logger.info("Checking Ollama connection...")
+        try:
+            response = requests.get(f"{self.base_url}/api/tags", timeout=5)
+            if response.status_code == 200:
+                data = response.json()
+                self.available_models = [m["name"] for m in data.get("models", [])]
+                self.logger.info(f"Ollama connected! Found {len(self.available_models)} models")
+                for model in self.available_models:
+                    self.logger.info(f"  - {model}")
+                return True
+            return False
+        except Exception as e:
+            self.logger.warning(f"Ollama not available: {e}")
+            return False
+    def list_models(self):
+        """List available models."""
+        try:
+            response = requests.get(f"{self.base_url}/api/tags", timeout=5)
+            if response.status_code == 200:
+                data = response.json()
+                return [m["name"] for m in data.get("models", [])]
+        except:
+            pass
+        return []
+    def generate(self, prompt, model=None):
+        """
+        Generate a response from Ollama.
+        Args:
+            prompt: The user's message/prompt
+            model: Optional model override
+        Returns:
+            dict with 'success', 'response' or 'error', and 'debug_info'
+        """
+        self.request_count += 1
+        request_id = f"OLL-{self.request_count:04d}"
+        use_model = model or self.model
+        self.logger.info("-" * 50)
+        self.logger.info(f"[{request_id}] OLLAMA REQUEST")
+        self.logger.info(f"[{request_id}] Model: {use_model}")
+        self.logger.info(f"[{request_id}] Prompt: {prompt[:100]}...")
+        debug_info = {
+            "request_id": request_id,
+            "timestamp": datetime.now().isoformat(),
+            "model": use_model,
+            "prompt_length": len(prompt)
+        }
+        payload = {
+            "model": use_model,
+            "prompt": prompt,
+            "stream": False
+        }
+        try:
+            self.logger.info(f"[{request_id}] Sending to Ollama...")
+            response = requests.post(
+                f"{self.base_url}/api/generate",
+                json=payload,
+                timeout=120  # Long timeout for generation
+            )
+            debug_info["status_code"] = response.status_code
+            self.logger.info(f"[{request_id}] Status: {response.status_code}")
+            if response.status_code == 200:
+                result = response.json()
+                if "error" in result:
+                    self.error_count += 1
+                    self.last_error = result["error"]
+                    self.logger.error(f"[{request_id}] Ollama error: {result['error']}")
+                    return {
+                        "success": False,
+                        "error": result["error"],
+                        "debug_info": debug_info
+                    }
+                response_text = result.get("response", "")
+                debug_info["eval_count"] = result.get("eval_count")
+                debug_info["total_duration_ms"] = result.get("total_duration", 0) / 1_000_000
+                self.success_count += 1
+                self.logger.info(f"[{request_id}] SUCCESS")
+                self.logger.info(f"[{request_id}] Response: {response_text[:100]}...")
+                self.logger.info(f"[{request_id}] Duration: {debug_info['total_duration_ms']:.0f}ms")
+                return {
+                    "success": True,
+                    "response": response_text,
+                    "debug_info": debug_info
+                }
+            else:
+                self.error_count += 1
+                error_msg = f"HTTP {response.status_code}: {response.text[:200]}"
+                self.last_error = error_msg
+                self.logger.error(f"[{request_id}] Error: {error_msg}")
+                return {
+                    "success": False,
+                    "error": error_msg,
+                    "debug_info": debug_info
+                }
+        except requests.exceptions.ConnectionError:
+            self.error_count += 1
+            self.last_error = "Cannot connect to Ollama"
+            self.logger.error(f"[{request_id}] Cannot connect to Ollama at {self.base_url}")
+            return {
+                "success": False,
+                "error": "Cannot connect to Ollama. Is it running?",
+                "debug_info": debug_info
+            }
+        except requests.exceptions.Timeout:
+            self.error_count += 1
+            self.last_error = "Timeout"
+            self.logger.error(f"[{request_id}] Request timed out")
+            return {
+                "success": False,
+                "error": "Request timed out after 120 seconds",
+                "debug_info": debug_info
+            }
+        except Exception as e:
+            self.error_count += 1
+            self.last_error = str(e)
+            self.logger.error(f"[{request_id}] Exception: {e}")
+            return {
+                "success": False,
+                "error": str(e),
+                "debug_info": debug_info
+            }
+    def chat(self, messages, model=None):
+        """
+        Chat with Ollama using message history (OpenAI-like format).
+        Args:
+            messages: List of {"role": "user/assistant", "content": "..."}
+            model: Optional model override
+        Returns:
+            dict with 'success', 'response' or 'error', and 'debug_info'
+        """
+        # Convert messages to prompt
+        prompt = ""
+        for msg in messages:
+            role = msg.get("role", "user")
+            content = msg.get("content", "")
+            if role == "system":
+                prompt += f"System: {content}\n"
+            elif role == "user":
+                prompt += f"User: {content}\n"
+            elif role == "assistant":
+                prompt += f"Assistant: {content}\n"
+        prompt += "Assistant:"
+        return self.generate(prompt, model)