Nhughes09 commited on
Commit
b438512
·
1 Parent(s): e54f4cc

Add Ollama client with modular architecture for local AI

Browse files
__pycache__/app.cpython-314.pyc ADDED
Binary file (6.73 kB). View file
 
__pycache__/ollama_client.cpython-314.pyc ADDED
Binary file (9.34 kB). View file
 
app.py CHANGED
@@ -1,8 +1,8 @@
1
- # app.py - Main Gradio Application
2
  import gradio as gr
 
3
  from logging_config import setup_logging, log_banner, log_section, log_startup_info
4
- from cloudflare_client import CloudflareAIClient
5
- import json
6
 
7
  # ============================================================================
8
  # INITIALIZATION
@@ -11,71 +11,59 @@ logger = setup_logging()
11
  log_startup_info(logger)
12
 
13
  # ============================================================================
14
- # CLOUDFLARE CONFIGURATION
15
  # ============================================================================
16
- # Your Cloudflare Worker endpoint - update this when deployed!
17
- CLOUDFLARE_ENDPOINT = "https://cloudflarellamaworker.nlhughes08.workers.dev"
 
 
 
 
 
18
 
19
- log_section(logger, "CLOUDFLARE AI CLIENT SETUP")
20
- ai_client = CloudflareAIClient(logger, CLOUDFLARE_ENDPOINT)
21
 
22
- # Test connection on startup
23
- logger.info("Testing Cloudflare endpoint on startup...")
24
- connection_ok = ai_client.test_connection()
25
- if connection_ok:
26
- logger.info("Cloudflare endpoint is REACHABLE")
 
 
 
 
 
 
 
27
  else:
28
- logger.warning("Cloudflare endpoint may not be ready - check deployment")
29
 
30
  # ============================================================================
31
  # CHAT RESPONSE FUNCTION
32
  # ============================================================================
33
  def respond(message, history):
34
- """Generate AI response using Cloudflare Workers AI."""
35
  log_section(logger, "NEW USER MESSAGE")
36
  logger.info(f"User: {message}")
37
  logger.info(f"History: {len(history)} previous messages")
38
 
39
- # Build context from history (simple approach)
40
- context = ""
41
- if history:
42
- for user_msg, bot_msg in history[-5:]: # Last 5 exchanges
43
- context += f"User: {user_msg}\n"
44
- if bot_msg:
45
- context += f"Assistant: {bot_msg}\n"
46
-
47
- # Full prompt with context
48
- if context:
49
- full_prompt = f"{context}User: {message}\nAssistant:"
50
- else:
51
- full_prompt = message
52
 
53
- logger.debug(f"Full prompt length: {len(full_prompt)} chars")
54
-
55
- # Call Cloudflare AI
56
- result = ai_client.call_ai(full_prompt)
57
 
58
  if result["success"]:
59
- response_text = result["response"]
60
- # Clean up any model artifacts like <|start_header_id|>
61
- for tag in ["<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>"]:
62
- response_text = response_text.replace(tag, "")
63
- return response_text.strip()
64
  else:
65
- # Return error with diagnostic info
66
  error_msg = result.get("error", "Unknown error")
67
- diagnosis = result.get("diagnosis", {})
68
-
69
- error_response = f"Error: {error_msg}\n\n"
70
-
71
- if diagnosis.get("suggestions"):
72
- error_response += "Troubleshooting:\n"
73
- for suggestion in diagnosis["suggestions"]:
74
- error_response += f"• {suggestion}\n"
75
-
76
- error_response += f"\nEndpoint: {CLOUDFLARE_ENDPOINT}"
77
-
78
- return error_response
79
 
80
  # ============================================================================
81
  # GRADIO UI
@@ -84,8 +72,8 @@ log_section(logger, "BUILDING GRADIO UI")
84
 
85
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
86
  gr.Markdown("# CPU Chatbot")
87
- gr.Markdown("### Powered by Cloudflare Workers AI (Llama Guard 3)")
88
- gr.Markdown(f"**Endpoint:** `{CLOUDFLARE_ENDPOINT}`")
89
 
90
  with gr.Row():
91
  with gr.Column(scale=4):
@@ -98,26 +86,37 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
98
  with gr.Column(scale=1):
99
  gr.Markdown("### Status")
100
  status_box = gr.JSON(
101
- label="Client Stats",
102
- value=ai_client.get_stats()
103
  )
104
- refresh_btn = gr.Button("Refresh Stats")
 
 
 
 
 
105
 
106
  def user_submit(message, history):
107
  if not message.strip():
108
- return "", history, ai_client.get_stats()
109
- return "", history + [[message, None]], ai_client.get_stats()
110
 
111
  def bot_respond(history):
112
  if not history:
113
- return history, ai_client.get_stats()
114
  user_message = history[-1][0]
115
  bot_response = respond(user_message, history[:-1])
116
  history[-1][1] = bot_response
117
- return history, ai_client.get_stats()
 
 
 
 
 
118
 
119
  def refresh_stats():
120
- return ai_client.get_stats()
 
121
 
122
  msg.submit(user_submit, [msg, chatbot], [msg, chatbot, status_box], queue=False).then(
123
  bot_respond, chatbot, [chatbot, status_box]
@@ -125,9 +124,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
125
  submit_btn.click(user_submit, [msg, chatbot], [msg, chatbot, status_box], queue=False).then(
126
  bot_respond, chatbot, [chatbot, status_box]
127
  )
 
128
  refresh_btn.click(refresh_stats, outputs=status_box)
129
 
130
- log_banner(logger, "SYSTEM READY - WAITING FOR MESSAGES")
 
 
131
 
132
  if __name__ == "__main__":
133
  demo.launch()
 
1
+ # app.py - Main Gradio Application with Ollama Backend
2
  import gradio as gr
3
+ import os
4
  from logging_config import setup_logging, log_banner, log_section, log_startup_info
5
+ from ollama_client import OllamaClient
 
6
 
7
  # ============================================================================
8
  # INITIALIZATION
 
11
  log_startup_info(logger)
12
 
13
  # ============================================================================
14
+ # OLLAMA CONFIGURATION
15
  # ============================================================================
16
+ # Models to try in order of preference (smaller = faster, more reliable)
17
+ MODELS = [
18
+ "llama3.2:3b", # Fast, small
19
+ "gemma3:1b", # Very fast, tiny
20
+ "phi3:mini", # Good quality, medium
21
+ "deepseek-coder:6.7b-instruct-q6_K", # Good for code
22
+ ]
23
 
24
+ log_section(logger, "OLLAMA CLIENT SETUP")
25
+ ollama = OllamaClient(logger, model=MODELS[0])
26
 
27
+ # Check connection and find working model
28
+ logger.info("Checking Ollama connection...")
29
+ if ollama.check_connection():
30
+ logger.info("Ollama is running!")
31
+ available = ollama.list_models()
32
+
33
+ # Find first available preferred model
34
+ for model in MODELS:
35
+ if model in available:
36
+ ollama.model = model
37
+ logger.info(f"Selected model: {model}")
38
+ break
39
  else:
40
+ logger.warning("Ollama not available - running in limited mode")
41
 
42
  # ============================================================================
43
  # CHAT RESPONSE FUNCTION
44
  # ============================================================================
45
  def respond(message, history):
46
+ """Generate AI response using Ollama."""
47
  log_section(logger, "NEW USER MESSAGE")
48
  logger.info(f"User: {message}")
49
  logger.info(f"History: {len(history)} previous messages")
50
 
51
+ # Build messages array
52
+ messages = [{"role": "system", "content": "You are a helpful AI assistant. Be concise and helpful."}]
53
+ for user_msg, bot_msg in history[-5:]: # Last 5 exchanges for context
54
+ messages.append({"role": "user", "content": user_msg})
55
+ if bot_msg:
56
+ messages.append({"role": "assistant", "content": bot_msg})
57
+ messages.append({"role": "user", "content": message})
 
 
 
 
 
 
58
 
59
+ # Call Ollama
60
+ result = ollama.chat(messages)
 
 
61
 
62
  if result["success"]:
63
+ return result["response"].strip()
 
 
 
 
64
  else:
 
65
  error_msg = result.get("error", "Unknown error")
66
+ return f"Error: {error_msg}\n\nMake sure Ollama is running: `ollama serve`"
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  # ============================================================================
69
  # GRADIO UI
 
72
 
73
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
74
  gr.Markdown("# CPU Chatbot")
75
+ gr.Markdown(f"### Powered by Ollama ({ollama.model})")
76
+ gr.Markdown("*Using local AI - no cloud required!*")
77
 
78
  with gr.Row():
79
  with gr.Column(scale=4):
 
86
  with gr.Column(scale=1):
87
  gr.Markdown("### Status")
88
  status_box = gr.JSON(
89
+ label="Ollama Stats",
90
+ value=ollama.get_stats()
91
  )
92
+ model_dropdown = gr.Dropdown(
93
+ choices=ollama.available_models or MODELS,
94
+ value=ollama.model,
95
+ label="Model"
96
+ )
97
+ refresh_btn = gr.Button("Refresh")
98
 
99
  def user_submit(message, history):
100
  if not message.strip():
101
+ return "", history, ollama.get_stats()
102
+ return "", history + [[message, None]], ollama.get_stats()
103
 
104
  def bot_respond(history):
105
  if not history:
106
+ return history, ollama.get_stats()
107
  user_message = history[-1][0]
108
  bot_response = respond(user_message, history[:-1])
109
  history[-1][1] = bot_response
110
+ return history, ollama.get_stats()
111
+
112
+ def change_model(model):
113
+ ollama.model = model
114
+ logger.info(f"Switched to model: {model}")
115
+ return ollama.get_stats()
116
 
117
  def refresh_stats():
118
+ ollama.check_connection()
119
+ return ollama.get_stats()
120
 
121
  msg.submit(user_submit, [msg, chatbot], [msg, chatbot, status_box], queue=False).then(
122
  bot_respond, chatbot, [chatbot, status_box]
 
124
  submit_btn.click(user_submit, [msg, chatbot], [msg, chatbot, status_box], queue=False).then(
125
  bot_respond, chatbot, [chatbot, status_box]
126
  )
127
+ model_dropdown.change(change_model, model_dropdown, status_box)
128
  refresh_btn.click(refresh_stats, outputs=status_box)
129
 
130
+ log_banner(logger, "SYSTEM READY - USING OLLAMA")
131
+ logger.info(f"Model: {ollama.model}")
132
+ logger.info("Run 'ollama serve' if not already running")
133
 
134
  if __name__ == "__main__":
135
  demo.launch()
ollama_client.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ollama_client.py - Ollama API Client
2
+ import requests
3
+ import json
4
+ from datetime import datetime
5
+
6
+ class OllamaClient:
7
+ """Client for calling local Ollama API."""
8
+
9
+ def __init__(self, logger, base_url="http://localhost:11434", model="llama3.2:3b"):
10
+ self.logger = logger
11
+ self.base_url = base_url
12
+ self.model = model
13
+ self.request_count = 0
14
+ self.success_count = 0
15
+ self.error_count = 0
16
+ self.last_error = None
17
+ self.available_models = []
18
+
19
+ self.logger.info(f"OllamaClient initialized")
20
+ self.logger.info(f"Base URL: {base_url}")
21
+ self.logger.info(f"Default Model: {model}")
22
+
23
+ def get_stats(self):
24
+ """Return client statistics."""
25
+ return {
26
+ "requests": self.request_count,
27
+ "successes": self.success_count,
28
+ "errors": self.error_count,
29
+ "model": self.model,
30
+ "last_error": self.last_error
31
+ }
32
+
33
+ def check_connection(self):
34
+ """Check if Ollama is running and accessible."""
35
+ self.logger.info("Checking Ollama connection...")
36
+ try:
37
+ response = requests.get(f"{self.base_url}/api/tags", timeout=5)
38
+ if response.status_code == 200:
39
+ data = response.json()
40
+ self.available_models = [m["name"] for m in data.get("models", [])]
41
+ self.logger.info(f"Ollama connected! Found {len(self.available_models)} models")
42
+ for model in self.available_models:
43
+ self.logger.info(f" - {model}")
44
+ return True
45
+ return False
46
+ except Exception as e:
47
+ self.logger.warning(f"Ollama not available: {e}")
48
+ return False
49
+
50
+ def list_models(self):
51
+ """List available models."""
52
+ try:
53
+ response = requests.get(f"{self.base_url}/api/tags", timeout=5)
54
+ if response.status_code == 200:
55
+ data = response.json()
56
+ return [m["name"] for m in data.get("models", [])]
57
+ except:
58
+ pass
59
+ return []
60
+
61
+ def generate(self, prompt, model=None):
62
+ """
63
+ Generate a response from Ollama.
64
+
65
+ Args:
66
+ prompt: The user's message/prompt
67
+ model: Optional model override
68
+
69
+ Returns:
70
+ dict with 'success', 'response' or 'error', and 'debug_info'
71
+ """
72
+ self.request_count += 1
73
+ request_id = f"OLL-{self.request_count:04d}"
74
+ use_model = model or self.model
75
+
76
+ self.logger.info("-" * 50)
77
+ self.logger.info(f"[{request_id}] OLLAMA REQUEST")
78
+ self.logger.info(f"[{request_id}] Model: {use_model}")
79
+ self.logger.info(f"[{request_id}] Prompt: {prompt[:100]}...")
80
+
81
+ debug_info = {
82
+ "request_id": request_id,
83
+ "timestamp": datetime.now().isoformat(),
84
+ "model": use_model,
85
+ "prompt_length": len(prompt)
86
+ }
87
+
88
+ payload = {
89
+ "model": use_model,
90
+ "prompt": prompt,
91
+ "stream": False
92
+ }
93
+
94
+ try:
95
+ self.logger.info(f"[{request_id}] Sending to Ollama...")
96
+ response = requests.post(
97
+ f"{self.base_url}/api/generate",
98
+ json=payload,
99
+ timeout=120 # Long timeout for generation
100
+ )
101
+
102
+ debug_info["status_code"] = response.status_code
103
+ self.logger.info(f"[{request_id}] Status: {response.status_code}")
104
+
105
+ if response.status_code == 200:
106
+ result = response.json()
107
+
108
+ if "error" in result:
109
+ self.error_count += 1
110
+ self.last_error = result["error"]
111
+ self.logger.error(f"[{request_id}] Ollama error: {result['error']}")
112
+ return {
113
+ "success": False,
114
+ "error": result["error"],
115
+ "debug_info": debug_info
116
+ }
117
+
118
+ response_text = result.get("response", "")
119
+ debug_info["eval_count"] = result.get("eval_count")
120
+ debug_info["total_duration_ms"] = result.get("total_duration", 0) / 1_000_000
121
+
122
+ self.success_count += 1
123
+ self.logger.info(f"[{request_id}] SUCCESS")
124
+ self.logger.info(f"[{request_id}] Response: {response_text[:100]}...")
125
+ self.logger.info(f"[{request_id}] Duration: {debug_info['total_duration_ms']:.0f}ms")
126
+
127
+ return {
128
+ "success": True,
129
+ "response": response_text,
130
+ "debug_info": debug_info
131
+ }
132
+ else:
133
+ self.error_count += 1
134
+ error_msg = f"HTTP {response.status_code}: {response.text[:200]}"
135
+ self.last_error = error_msg
136
+ self.logger.error(f"[{request_id}] Error: {error_msg}")
137
+ return {
138
+ "success": False,
139
+ "error": error_msg,
140
+ "debug_info": debug_info
141
+ }
142
+
143
+ except requests.exceptions.ConnectionError:
144
+ self.error_count += 1
145
+ self.last_error = "Cannot connect to Ollama"
146
+ self.logger.error(f"[{request_id}] Cannot connect to Ollama at {self.base_url}")
147
+ return {
148
+ "success": False,
149
+ "error": "Cannot connect to Ollama. Is it running?",
150
+ "debug_info": debug_info
151
+ }
152
+ except requests.exceptions.Timeout:
153
+ self.error_count += 1
154
+ self.last_error = "Timeout"
155
+ self.logger.error(f"[{request_id}] Request timed out")
156
+ return {
157
+ "success": False,
158
+ "error": "Request timed out after 120 seconds",
159
+ "debug_info": debug_info
160
+ }
161
+ except Exception as e:
162
+ self.error_count += 1
163
+ self.last_error = str(e)
164
+ self.logger.error(f"[{request_id}] Exception: {e}")
165
+ return {
166
+ "success": False,
167
+ "error": str(e),
168
+ "debug_info": debug_info
169
+ }
170
+
171
+ def chat(self, messages, model=None):
172
+ """
173
+ Chat with Ollama using message history (OpenAI-like format).
174
+
175
+ Args:
176
+ messages: List of {"role": "user/assistant", "content": "..."}
177
+ model: Optional model override
178
+
179
+ Returns:
180
+ dict with 'success', 'response' or 'error', and 'debug_info'
181
+ """
182
+ # Convert messages to prompt
183
+ prompt = ""
184
+ for msg in messages:
185
+ role = msg.get("role", "user")
186
+ content = msg.get("content", "")
187
+ if role == "system":
188
+ prompt += f"System: {content}\n"
189
+ elif role == "user":
190
+ prompt += f"User: {content}\n"
191
+ elif role == "assistant":
192
+ prompt += f"Assistant: {content}\n"
193
+ prompt += "Assistant:"
194
+
195
+ return self.generate(prompt, model)