Rights4AI commited on
Commit
ca9b9a8
·
verified ·
1 Parent(s): b7a364e

Deploy Gradio app with multiple files

Browse files
Files changed (4) hide show
  1. app.py +398 -0
  2. models.py +234 -0
  3. requirements.txt +7 -0
  4. utils.py +224 -0
app.py ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from pathlib import Path
4
+ from typing import List, Dict, Optional, Generator
5
+ import json
6
+ import time
7
+
8
+ from models import ModelManager
9
+ from utils import get_available_models, format_chat_history, parse_model_info
10
+
11
+ class ChatbotApp:
12
+ def __init__(self):
13
+ self.model_manager = ModelManager()
14
+ self.current_model = None
15
+ self.chat_history = []
16
+ self.system_prompt = "You are a helpful assistant."
17
+
18
+ def load_model(self, model_path: str, context_size: int = 2048, gpu_layers: int = 0) -> str:
19
+ """Load a GGUF model"""
20
+ try:
21
+ if not model_path or not os.path.exists(model_path):
22
+ return "❌ Please select a valid model file"
23
+
24
+ success = self.model_manager.load_model(
25
+ model_path=model_path,
26
+ context_size=context_size,
27
+ gpu_layers=gpu_layers
28
+ )
29
+
30
+ if success:
31
+ self.current_model = Path(model_path).name
32
+ return f"✅ Successfully loaded: {self.current_model}"
33
+ else:
34
+ return "❌ Failed to load model"
35
+
36
+ except Exception as e:
37
+ return f"❌ Error loading model: {str(e)}"
38
+
39
+ def unload_model(self) -> str:
40
+ """Unload the current model"""
41
+ self.model_manager.unload_model()
42
+ self.current_model = None
43
+ return "✅ Model unloaded"
44
+
45
+ def chat_response(
46
+ self,
47
+ message: str,
48
+ history: List[List[str]],
49
+ temperature: float,
50
+ max_tokens: int,
51
+ top_p: float,
52
+ repeat_penalty: float
53
+ ) -> Generator[str, None, None]:
54
+ """Generate response from the model"""
55
+ if not self.model_manager.is_loaded():
56
+ yield "❌ No model loaded. Please load a model first."
57
+ return
58
+
59
+ try:
60
+ # Format chat history
61
+ formatted_history = format_chat_history(history, self.system_prompt)
62
+
63
+ # Generate response
64
+ response_text = ""
65
+ for chunk in self.model_manager.generate(
66
+ prompt=formatted_history + message,
67
+ temperature=temperature,
68
+ max_tokens=max_tokens,
69
+ top_p=top_p,
70
+ repeat_penalty=repeat_penalty
71
+ ):
72
+ response_text += chunk
73
+ yield response_text
74
+
75
+ except Exception as e:
76
+ yield f"❌ Error generating response: {str(e)}"
77
+
78
+ def clear_chat(self) -> List[List[str]]:
79
+ """Clear chat history"""
80
+ self.chat_history = []
81
+ return []
82
+
83
+ def get_model_info(self) -> str:
84
+ """Get information about the loaded model"""
85
+ if not self.current_model:
86
+ return "No model loaded"
87
+
88
+ try:
89
+ model_info = self.model_manager.get_model_info()
90
+ if model_info:
91
+ return json.dumps(model_info, indent=2)
92
+ return "Model info not available"
93
+ except Exception as e:
94
+ return f"Error getting model info: {str(e)}"
95
+
96
+ def create_interface():
97
+ """Create the Gradio interface"""
98
+ app = ChatbotApp()
99
+
100
+ with gr.Blocks(theme=gr.themes.Soft(), title="Local GGUF Chatbot") as demo:
101
+ gr.Markdown("""
102
+ # 🤖 Local GGUF Chatbot
103
+ Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
104
+
105
+ Chat with local GGUF models using llama.cpp. Load your models and start chatting!
106
+ """)
107
+
108
+ with gr.Tabs():
109
+ # Chat Tab
110
+ with gr.Tab("💬 Chat"):
111
+ with gr.Row():
112
+ with gr.Column(scale=3):
113
+ chatbot = gr.Chatbot(
114
+ label="Chat",
115
+ height=500,
116
+ show_copy_button=True,
117
+ type="messages"
118
+ )
119
+
120
+ with gr.Row():
121
+ msg = gr.Textbox(
122
+ label="Message",
123
+ placeholder="Type your message here...",
124
+ scale=4
125
+ )
126
+ send_btn = gr.Button("Send", scale=1)
127
+ clear_btn = gr.Button("Clear", scale=1)
128
+
129
+ with gr.Column(scale=1):
130
+ gr.Markdown("### ⚙️ Generation Parameters")
131
+
132
+ temperature = gr.Slider(
133
+ minimum=0.1,
134
+ maximum=2.0,
135
+ value=0.7,
136
+ step=0.1,
137
+ label="Temperature"
138
+ )
139
+
140
+ max_tokens = gr.Slider(
141
+ minimum=1,
142
+ maximum=4096,
143
+ value=512,
144
+ step=1,
145
+ label="Max Tokens"
146
+ )
147
+
148
+ top_p = gr.Slider(
149
+ minimum=0.1,
150
+ maximum=1.0,
151
+ value=0.9,
152
+ step=0.05,
153
+ label="Top P"
154
+ )
155
+
156
+ repeat_penalty = gr.Slider(
157
+ minimum=1.0,
158
+ maximum=2.0,
159
+ value=1.1,
160
+ step=0.05,
161
+ label="Repeat Penalty"
162
+ )
163
+
164
+ system_prompt = gr.Textbox(
165
+ label="System Prompt",
166
+ value="You are a helpful assistant.",
167
+ lines=3
168
+ )
169
+
170
+ model_status = gr.Textbox(
171
+ label="Model Status",
172
+ value="No model loaded",
173
+ interactive=False
174
+ )
175
+
176
+ # Model Management Tab
177
+ with gr.Tab("📁 Models"):
178
+ with gr.Row():
179
+ with gr.Column():
180
+ gr.Markdown("### Load Model")
181
+
182
+ model_file = gr.File(
183
+ label="Select GGUF Model",
184
+ file_types=[".gguf"],
185
+ file_count="single"
186
+ )
187
+
188
+ with gr.Row():
189
+ context_size = gr.Slider(
190
+ minimum=512,
191
+ maximum=8192,
192
+ value=2048,
193
+ step=512,
194
+ label="Context Size"
195
+ )
196
+
197
+ gpu_layers = gr.Slider(
198
+ minimum=0,
199
+ maximum=99,
200
+ value=0,
201
+ step=1,
202
+ label="GPU Layers"
203
+ )
204
+
205
+ load_btn = gr.Button("Load Model", variant="primary")
206
+ unload_btn = gr.Button("Unload Model")
207
+
208
+ load_status = gr.Textbox(
209
+ label="Load Status",
210
+ interactive=False
211
+ )
212
+
213
+ with gr.Column():
214
+ gr.Markdown("### Available Models")
215
+
216
+ available_models = gr.JSON(
217
+ label="Models Directory",
218
+ value=get_available_models()
219
+ )
220
+
221
+ model_info = gr.JSON(
222
+ label="Model Information",
223
+ visible=False
224
+ )
225
+
226
+ refresh_btn = gr.Button("Refresh Models")
227
+
228
+ # Settings Tab
229
+ with gr.Tab("⚙️ Settings"):
230
+ gr.Markdown("### Application Settings")
231
+
232
+ with gr.Row():
233
+ with gr.Column():
234
+ models_dir = gr.Textbox(
235
+ label="Models Directory",
236
+ value="./models",
237
+ placeholder="Path to models directory"
238
+ )
239
+
240
+ save_chat = gr.Checkbox(
241
+ label="Save Chat History",
242
+ value=True
243
+ )
244
+
245
+ chat_format = gr.Dropdown(
246
+ label="Chat Format",
247
+ choices=["chatml", "llama2", "alpaca", "vicuna"],
248
+ value="chatml"
249
+ )
250
+
251
+ with gr.Column():
252
+ gr.Markdown("### Model Directory Info")
253
+ dir_info = gr.JSON(label="Directory Info")
254
+
255
+ update_dir_btn = gr.Button("Update Directory")
256
+
257
+ # Event handlers
258
+ def update_system_prompt(prompt):
259
+ app.system_prompt = prompt
260
+ return prompt
261
+
262
+ system_prompt.change(update_system_prompt, system_prompt)
263
+
264
+ # Chat functionality
265
+ def user_message(user_input, history):
266
+ if not user_input.strip():
267
+ return "", history
268
+
269
+ history.append({"role": "user", "content": user_input})
270
+ return "", history
271
+
272
+ def bot_response(history, temp, max_tok, top_p_val, repeat_pen):
273
+ if not history:
274
+ return history
275
+
276
+ last_message = history[-1]["content"] if history else ""
277
+
278
+ # Add assistant message placeholder
279
+ history.append({"role": "assistant", "content": ""})
280
+
281
+ # Generate response
282
+ response = ""
283
+ for chunk in app.chat_response(
284
+ last_message,
285
+ [{"role": h["role"], "content": h["content"]} for h in history[:-2]],
286
+ temp,
287
+ max_tok,
288
+ top_p_val,
289
+ repeat_pen
290
+ ):
291
+ history[-1]["content"] = chunk
292
+ yield history
293
+
294
+ msg.submit(
295
+ user_message,
296
+ [msg, chatbot],
297
+ [msg, chatbot]
298
+ ).then(
299
+ bot_response,
300
+ [chatbot, temperature, max_tokens, top_p, repeat_penalty],
301
+ chatbot
302
+ )
303
+
304
+ send_btn.click(
305
+ user_message,
306
+ [msg, chatbot],
307
+ [msg, chatbot]
308
+ ).then(
309
+ bot_response,
310
+ [chatbot, temperature, max_tokens, top_p, repeat_penalty],
311
+ chatbot
312
+ )
313
+
314
+ clear_btn.click(app.clear_chat, outputs=chatbot)
315
+
316
+ # Model loading
317
+ def handle_model_load(file_obj, ctx_size, gpu_layers):
318
+ if file_obj is None:
319
+ return "❌ Please select a model file"
320
+
321
+ status = app.load_model(file_obj.name, ctx_size, gpu_layers)
322
+
323
+ # Update model status
324
+ if app.current_model:
325
+ model_status_text = f"✅ Loaded: {app.current_model}"
326
+ else:
327
+ model_status_text = "No model loaded"
328
+
329
+ return status, model_status_text
330
+
331
+ load_btn.click(
332
+ handle_model_load,
333
+ [model_file, context_size, gpu_layers],
334
+ [load_status, model_status]
335
+ )
336
+
337
+ unload_btn.click(
338
+ app.unload_model,
339
+ outputs=[load_status, model_status]
340
+ )
341
+
342
+ # Refresh models
343
+ refresh_btn.click(
344
+ lambda: get_available_models(),
345
+ outputs=available_models
346
+ )
347
+
348
+ # Update model info when model is loaded
349
+ def update_model_info():
350
+ if app.current_model:
351
+ try:
352
+ info = app.model_manager.get_model_info()
353
+ return info
354
+ except:
355
+ return None
356
+ return None
357
+
358
+ demo.load(
359
+ update_model_info,
360
+ outputs=[model_info]
361
+ )
362
+
363
+ # Update directory info
364
+ def update_directory_info(directory):
365
+ try:
366
+ path = Path(directory)
367
+ if path.exists():
368
+ return {
369
+ "exists": True,
370
+ "is_directory": path.is_dir(),
371
+ "file_count": len(list(path.glob("*.gguf"))),
372
+ "size_mb": sum(f.stat().st_size for f in path.glob("*.gguf")) / (1024 * 1024)
373
+ }
374
+ else:
375
+ return {"exists": False}
376
+ except Exception as e:
377
+ return {"error": str(e)}
378
+
379
+ update_dir_btn.click(
380
+ update_directory_info,
381
+ models_dir,
382
+ dir_info
383
+ )
384
+
385
+ return demo
386
+
387
+ if __name__ == "__main__":
388
+ # Create models directory if it doesn't exist
389
+ os.makedirs("./models", exist_ok=True)
390
+
391
+ # Create and launch the interface
392
+ demo = create_interface()
393
+ demo.launch(
394
+ server_name="0.0.0.0",
395
+ server_port=7860,
396
+ share=False,
397
+ show_api=True
398
+ )
models.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Optional, Dict, Generator, List
4
+ import json
5
+ import logging
6
+
7
+ # Try to import llama-cpp-python
8
+ try:
9
+ from llama_cpp import Llama
10
+ LLAMA_AVAILABLE = True
11
+ except ImportError:
12
+ LLAMA_AVAILABLE = False
13
+ Llama = None
14
+ logging.warning("llama-cpp-python not installed. Install with: pip install llama-cpp-python")
15
+
16
+ class ModelManager:
17
+ """Manages loading and inference of GGUF models"""
18
+
19
+ def __init__(self):
20
+ self.model: Optional[Llama] = None
21
+ self.model_path: Optional[str] = None
22
+ self.context_size: int = 2048
23
+ self.gpu_layers: int = 0
24
+
25
+ def is_loaded(self) -> bool:
26
+ """Check if a model is loaded"""
27
+ return self.model is not None
28
+
29
+ def load_model(
30
+ self,
31
+ model_path: str,
32
+ context_size: int = 2048,
33
+ gpu_layers: int = 0,
34
+ n_ctx: Optional[int] = None,
35
+ n_gpu_layers: Optional[int] = None,
36
+ verbose: bool = True
37
+ ) -> bool:
38
+ """Load a GGUF model"""
39
+ if not LLAMA_AVAILABLE:
40
+ logging.error("llama-cpp-python is not installed")
41
+ return False
42
+
43
+ try:
44
+ # Unload existing model if any
45
+ if self.model:
46
+ self.unload_model()
47
+
48
+ # Set parameters
49
+ self.context_size = n_ctx or context_size
50
+ self.gpu_layers = n_gpu_layers or gpu_layers
51
+ self.model_path = model_path
52
+
53
+ # Load the model
54
+ self.model = Llama(
55
+ model_path=model_path,
56
+ n_ctx=self.context_size,
57
+ n_gpu_layers=self.gpu_layers,
58
+ verbose=verbose,
59
+ embedding=False,
60
+ f16_kv=True,
61
+ use_mmap=True,
62
+ use_mlock=False,
63
+ logits_all=False,
64
+ vocab_only=False
65
+ )
66
+
67
+ logging.info(f"Model loaded successfully: {model_path}")
68
+ return True
69
+
70
+ except Exception as e:
71
+ logging.error(f"Failed to load model: {str(e)}")
72
+ self.model = None
73
+ self.model_path = None
74
+ return False
75
+
76
+ def unload_model(self):
77
+ """Unload the current model"""
78
+ if self.model:
79
+ del self.model
80
+ self.model = None
81
+ self.model_path = None
82
+ logging.info("Model unloaded")
83
+
84
+ def generate(
85
+ self,
86
+ prompt: str,
87
+ temperature: float = 0.7,
88
+ max_tokens: int = 512,
89
+ top_p: float = 0.9,
90
+ repeat_penalty: float = 1.1,
91
+ stop: Optional[List[str]] = None,
92
+ stream: bool = True
93
+ ) -> Generator[str, None, None]:
94
+ """Generate text from the model"""
95
+ if not self.model:
96
+ raise ValueError("No model loaded")
97
+
98
+ try:
99
+ # Generate response
100
+ if stream:
101
+ for chunk in self.model(
102
+ prompt,
103
+ max_tokens=max_tokens,
104
+ temperature=temperature,
105
+ top_p=top_p,
106
+ repeat_penalty=repeat_penalty,
107
+ stop=stop or [],
108
+ stream=True
109
+ ):
110
+ if chunk["choices"]:
111
+ yield chunk["choices"][0]["text"]
112
+ else:
113
+ output = self.model(
114
+ prompt,
115
+ max_tokens=max_tokens,
116
+ temperature=temperature,
117
+ top_p=top_p,
118
+ repeat_penalty=repeat_penalty,
119
+ stop=stop or [],
120
+ stream=False
121
+ )
122
+ yield output["choices"][0]["text"]
123
+
124
+ except Exception as e:
125
+ logging.error(f"Generation error: {str(e)}")
126
+ raise
127
+
128
+ def get_model_info(self) -> Optional[Dict]:
129
+ """Get information about the loaded model"""
130
+ if not self.model:
131
+ return None
132
+
133
+ try:
134
+ # Extract model metadata
135
+ metadata = getattr(self.model, 'metadata', {})
136
+
137
+ # Try to get tokenizer info
138
+ try:
139
+ vocab_size = len(self.model._model.tokenizer().vocab())
140
+ except:
141
+ vocab_size = None
142
+
143
+ # Basic model info
144
+ info = {
145
+ "model_path": self.model_path,
146
+ "context_size": self.context_size,
147
+ "gpu_layers": self.gpu_layers,
148
+ "vocab_size": vocab_size,
149
+ }
150
+
151
+ # Add metadata if available
152
+ if metadata:
153
+ # Extract common metadata fields
154
+ common_fields = [
155
+ "general.architecture",
156
+ "llama.vocab_size",
157
+ "llama.context_length",
158
+ "llama.embedding_length",
159
+ "llama.block_count",
160
+ "llama.feed_forward_length",
161
+ "llama.attention.head_count",
162
+ "llama.attention.head_count_kv",
163
+ "llama.rope.dimension_count",
164
+ "llama.attention.layer_norm_rms_epsilon",
165
+ "tokenizer.ggml.model",
166
+ "tokenizer.ggml.tokens",
167
+ ]
168
+
169
+ for field in common_fields:
170
+ if field in metadata:
171
+ info[field] = metadata[field]
172
+
173
+ # Add all metadata as raw for debugging
174
+ info["raw_metadata"] = {k: v for k, v in metadata.items()
175
+ if not isinstance(v, (bytes, bytearray))}
176
+
177
+ return info
178
+
179
+ except Exception as e:
180
+ logging.error(f"Error getting model info: {str(e)}")
181
+ return {"error": str(e)}
182
+
183
+ def tokenize(self, text: str) -> List[int]:
184
+ """Tokenize text"""
185
+ if not self.model:
186
+ raise ValueError("No model loaded")
187
+
188
+ try:
189
+ return self.model.tokenize(text.encode("utf-8"))
190
+ except Exception as e:
191
+ logging.error(f"Tokenization error: {str(e)}")
192
+ return []
193
+
194
+ def detokenize(self, tokens: List[int]) -> str:
195
+ """Detokenize tokens"""
196
+ if not self.model:
197
+ raise ValueError("No model loaded")
198
+
199
+ try:
200
+ return self.model.detokenize(tokens).decode("utf-8")
201
+ except Exception as e:
202
+ logging.error(f"Detokenization error: {str(e)}")
203
+ return ""
204
+
205
+ def check_model_compatibility(model_path: str) -> Dict:
206
+ """Check if a model file is compatible"""
207
+ result = {
208
+ "exists": False,
209
+ "readable": False,
210
+ "gguf": False,
211
+ "size_mb": 0,
212
+ "error": None
213
+ }
214
+
215
+ try:
216
+ path = Path(model_path)
217
+ result["exists"] = path.exists()
218
+
219
+ if result["exists"]:
220
+ result["size_mb"] = path.stat().st_size / (1024 * 1024)
221
+ result["gguf"] = path.suffix.lower() == ".gguf"
222
+
223
+ # Try to read file header
224
+ try:
225
+ with open(path, "rb") as f:
226
+ header = f.read(4)
227
+ result["readable"] = len(header) == 4
228
+ except:
229
+ result["readable"] = False
230
+
231
+ except Exception as e:
232
+ result["error"] = str(e)
233
+
234
+ return result
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ llama-cpp-python
3
+ pathlib
4
+ typing
5
+ json
6
+ logging
7
+ datetime
utils.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from pathlib import Path
4
+ from typing import List, Dict, Any, Optional
5
+ import logging
6
+ from datetime import datetime
7
+
8
+ def get_available_models(directory: str = "./models") -> Dict[str, Any]:
9
+ """Get available GGUF models in the directory"""
10
+ try:
11
+ path = Path(directory)
12
+ if not path.exists():
13
+ return {"error": f"Directory {directory} does not exist"}
14
+
15
+ models = []
16
+ for file in path.glob("*.gguf"):
17
+ try:
18
+ stat = file.stat()
19
+ models.append({
20
+ "name": file.name,
21
+ "path": str(file),
22
+ "size_mb": round(stat.st_size / (1024 * 1024), 2),
23
+ "modified": datetime.fromtimestamp(stat.st_mtime).isoformat()
24
+ })
25
+ except Exception as e:
26
+ logging.warning(f"Error reading {file}: {e}")
27
+
28
+ # Sort by name
29
+ models.sort(key=lambda x: x["name"])
30
+
31
+ return {
32
+ "directory": directory,
33
+ "exists": True,
34
+ "model_count": len(models),
35
+ "models": models
36
+ }
37
+
38
+ except Exception as e:
39
+ return {"error": str(e)}
40
+
41
+ def format_chat_history(history: List[List[str]], system_prompt: str = "") -> str:
42
+ """Format chat history for the model"""
43
+ formatted = ""
44
+
45
+ if system_prompt:
46
+ formatted += f"<|system|>\n{system_prompt}\n<|end|>\n\n"
47
+
48
+ for message in history:
49
+ role, content = message
50
+ if role == "user":
51
+ formatted += f"<|user|>\n{content}\n<|end|>\n\n"
52
+ elif role == "assistant":
53
+ formatted += f"<|assistant|>\n{content}\n<|end|>\n\n"
54
+
55
+ formatted += "<|assistant|>\n"
56
+ return formatted
57
+
58
+ def format_chat_history_messages(history: List[Dict[str, str]], system_prompt: str = "") -> str:
59
+ """Format chat history (message format) for the model"""
60
+ formatted = ""
61
+
62
+ if system_prompt:
63
+ formatted += f"<|system|>\n{system_prompt}\n<|end|>\n\n"
64
+
65
+ for message in history:
66
+ role = message.get("role", "")
67
+ content = message.get("content", "")
68
+
69
+ if role == "user":
70
+ formatted += f"<|user|>\n{content}\n<|end|>\n\n"
71
+ elif role == "assistant":
72
+ formatted += f"<|assistant|>\n{content}\n<|end|>\n\n"
73
+
74
+ formatted += "<|assistant|>\n"
75
+ return formatted
76
+
77
+ def parse_model_info(metadata: Dict[str, Any]) -> Dict[str, Any]:
78
+ """Parse model metadata into a readable format"""
79
+ parsed = {
80
+ "architecture": "Unknown",
81
+ "parameters": "Unknown",
82
+ "context_length": "Unknown",
83
+ "embedding_size": "Unknown",
84
+ "layers": "Unknown",
85
+ "heads": "Unknown"
86
+ }
87
+
88
+ # Try to extract common fields
89
+ if "general.architecture" in metadata:
90
+ parsed["architecture"] = metadata["general.architecture"]
91
+
92
+ if "llama.block_count" in metadata:
93
+ parsed["layers"] = metadata["llama.block_count"]
94
+
95
+ if "llama.context_length" in metadata:
96
+ parsed["context_length"] = metadata["llama.context_length"]
97
+
98
+ if "llama.embedding_length" in metadata:
99
+ parsed["embedding_size"] = metadata["llama.embedding_length"]
100
+
101
+ if "llama.attention.head_count" in metadata:
102
+ parsed["heads"] = metadata["llama.attention.head_count"]
103
+
104
+ # Estimate parameters based on architecture
105
+ if parsed["architecture"] == "llama":
106
+ try:
107
+ layers = int(parsed["layers"]) if parsed["layers"] != "Unknown" else 0
108
+ embed_size = int(parsed["embedding_size"]) if parsed["embedding_size"] != "Unknown" else 0
109
+
110
+ if layers > 0 and embed_size > 0:
111
+ # Rough estimate for LLaMA parameters
112
+ params = layers * (12 * embed_size * embed_size + 13 * embed_size)
113
+ if params > 1e9:
114
+ parsed["parameters"] = f"{params / 1e9:.1f}B"
115
+ elif params > 1e6:
116
+ parsed["parameters"] = f"{params / 1e6:.1f}M"
117
+ else:
118
+ parsed["parameters"] = str(params)
119
+ except:
120
+ pass
121
+
122
+ return parsed
123
+
124
+ def save_chat_history(history: List[Dict[str, str]], filename: str = None) -> str:
125
+ """Save chat history to a JSON file"""
126
+ if filename is None:
127
+ filename = f"chat_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
128
+
129
+ try:
130
+ with open(filename, 'w', encoding='utf-8') as f:
131
+ json.dump(history, f, indent=2, ensure_ascii=False)
132
+ return filename
133
+ except Exception as e:
134
+ logging.error(f"Failed to save chat history: {e}")
135
+ return ""
136
+
137
+ def load_chat_history(filename: str) -> List[Dict[str, str]]:
138
+ """Load chat history from a JSON file"""
139
+ try:
140
+ with open(filename, 'r', encoding='utf-8') as f:
141
+ return json.load(f)
142
+ except Exception as e:
143
+ logging.error(f"Failed to load chat history: {e}")
144
+ return []
145
+
146
+ def estimate_tokens(text: str) -> int:
147
+ """Estimate token count (rough approximation)"""
148
+ # Simple approximation: ~4 characters per token
149
+ return len(text) // 4
150
+
151
+ def validate_model_file(model_path: str) -> Dict[str, Any]:
152
+ """Validate a model file"""
153
+ result = {
154
+ "valid": False,
155
+ "exists": False,
156
+ "readable": False,
157
+ "size_mb": 0,
158
+ "file_type": None,
159
+ "error": None
160
+ }
161
+
162
+ try:
163
+ path = Path(model_path)
164
+ result["exists"] = path.exists()
165
+
166
+ if not result["exists"]:
167
+ result["error"] = "File does not exist"
168
+ return result
169
+
170
+ result["size_mb"] = round(path.stat().st_size / (1024 * 1024), 2)
171
+ result["file_type"] = path.suffix.lower()
172
+
173
+ if result["file_type"] != ".gguf":
174
+ result["error"] = "Not a GGUF file"
175
+ return result
176
+
177
+ # Try to read first few bytes
178
+ try:
179
+ with open(path, "rb") as f:
180
+ header = f.read(4)
181
+ result["readable"] = len(header) == 4
182
+ result["valid"] = result["readable"]
183
+ except Exception as e:
184
+ result["error"] = f"Cannot read file: {str(e)}"
185
+
186
+ except Exception as e:
187
+ result["error"] = str(e)
188
+
189
+ return result
190
+
191
+ def create_default_config() -> Dict[str, Any]:
192
+ """Create default configuration"""
193
+ return {
194
+ "models_directory": "./models",
195
+ "default_context_size": 2048,
196
+ "default_gpu_layers": 0,
197
+ "default_temperature": 0.7,
198
+ "default_max_tokens": 512,
199
+ "default_top_p": 0.9,
200
+ "default_repeat_penalty": 1.1,
201
+ "system_prompt": "You are a helpful assistant.",
202
+ "chat_format": "chatml",
203
+ "auto_save_chat": True,
204
+ "theme": "soft"
205
+ }
206
+
207
+ def load_config(config_path: str = "config.json") -> Dict[str, Any]:
208
+ """Load configuration from file"""
209
+ try:
210
+ if Path(config_path).exists():
211
+ with open(config_path, 'r') as f:
212
+ return json.load(f)
213
+ except Exception as e:
214
+ logging.warning(f"Failed to load config: {e}")
215
+
216
+ return create_default_config()
217
+
218
+ def save_config(config: Dict[str, Any], config_path: str = "config.json"):
219
+ """Save configuration to file"""
220
+ try:
221
+ with open(config_path, 'w') as f:
222
+ json.dump(config, f, indent=2)
223
+ except Exception as e:
224
+ logging.error(f"Failed to save config: {e}")