File size: 22,568 Bytes
7edec80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
"""
HuggingFace Inference Client
Design System Extractor v2

Handles all LLM inference calls using HuggingFace Inference API.
Supports diverse models from different providers for specialized tasks.
"""

import os
from typing import Optional, AsyncGenerator
from dataclasses import dataclass
from huggingface_hub import InferenceClient, AsyncInferenceClient

from config.settings import get_settings


@dataclass
class ModelInfo:
    """Information about a model."""
    model_id: str
    provider: str
    context_length: int
    strengths: list[str]
    best_for: str
    tier: str  # "free", "pro", "pro+"


# =============================================================================
# COMPREHENSIVE MODEL REGISTRY β€” Organized by Provider
# =============================================================================

AVAILABLE_MODELS = {
    # =========================================================================
    # META β€” Llama Family (Best for reasoning)
    # =========================================================================
    "meta-llama/Llama-3.1-405B-Instruct": ModelInfo(
        model_id="meta-llama/Llama-3.1-405B-Instruct",
        provider="Meta",
        context_length=128000,
        strengths=["Best reasoning", "Massive knowledge", "Complex analysis"],
        best_for="Agent 3 (Advisor) β€” PREMIUM CHOICE",
        tier="pro+"
    ),
    "meta-llama/Llama-3.1-70B-Instruct": ModelInfo(
        model_id="meta-llama/Llama-3.1-70B-Instruct",
        provider="Meta",
        context_length=128000,
        strengths=["Excellent reasoning", "Long context", "Design knowledge"],
        best_for="Agent 3 (Advisor) β€” RECOMMENDED",
        tier="pro"
    ),
    "meta-llama/Llama-3.1-8B-Instruct": ModelInfo(
        model_id="meta-llama/Llama-3.1-8B-Instruct",
        provider="Meta",
        context_length=128000,
        strengths=["Fast", "Good reasoning for size", "Long context"],
        best_for="Budget Agent 3 fallback",
        tier="free"
    ),
    
    # =========================================================================
    # MISTRAL β€” European Excellence
    # =========================================================================
    "mistralai/Mixtral-8x22B-Instruct-v0.1": ModelInfo(
        model_id="mistralai/Mixtral-8x22B-Instruct-v0.1",
        provider="Mistral",
        context_length=65536,
        strengths=["Large MoE", "Strong reasoning", "Efficient"],
        best_for="Agent 3 (Advisor) β€” Pro alternative",
        tier="pro"
    ),
    "mistralai/Mixtral-8x7B-Instruct-v0.1": ModelInfo(
        model_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
        provider="Mistral",
        context_length=32768,
        strengths=["Good MoE efficiency", "Solid reasoning"],
        best_for="Agent 3 (Advisor) β€” Free tier option",
        tier="free"
    ),
    "mistralai/Mistral-7B-Instruct-v0.3": ModelInfo(
        model_id="mistralai/Mistral-7B-Instruct-v0.3",
        provider="Mistral",
        context_length=32768,
        strengths=["Fast", "Good instruction following"],
        best_for="General fallback",
        tier="free"
    ),
    "mistralai/Codestral-22B-v0.1": ModelInfo(
        model_id="mistralai/Codestral-22B-v0.1",
        provider="Mistral",
        context_length=32768,
        strengths=["Code specialist", "JSON generation", "Structured output"],
        best_for="Agent 4 (Generator) β€” RECOMMENDED",
        tier="pro"
    ),
    
    # =========================================================================
    # COHERE β€” Command R Family (Analysis & Retrieval)
    # =========================================================================
    "CohereForAI/c4ai-command-r-plus": ModelInfo(
        model_id="CohereForAI/c4ai-command-r-plus",
        provider="Cohere",
        context_length=128000,
        strengths=["Excellent analysis", "RAG optimized", "Long context"],
        best_for="Agent 3 (Advisor) β€” Great for research tasks",
        tier="pro"
    ),
    "CohereForAI/c4ai-command-r-v01": ModelInfo(
        model_id="CohereForAI/c4ai-command-r-v01",
        provider="Cohere",
        context_length=128000,
        strengths=["Good analysis", "Efficient"],
        best_for="Agent 3 budget option",
        tier="free"
    ),
    
    # =========================================================================
    # GOOGLE β€” Gemma Family
    # =========================================================================
    "google/gemma-2-27b-it": ModelInfo(
        model_id="google/gemma-2-27b-it",
        provider="Google",
        context_length=8192,
        strengths=["Strong instruction following", "Good balance"],
        best_for="Agent 2 (Normalizer) β€” Quality option",
        tier="pro"
    ),
    "google/gemma-2-9b-it": ModelInfo(
        model_id="google/gemma-2-9b-it",
        provider="Google",
        context_length=8192,
        strengths=["Fast", "Good instruction following"],
        best_for="Agent 2 (Normalizer) β€” Balanced",
        tier="free"
    ),
    
    # =========================================================================
    # MICROSOFT β€” Phi Family (Small but Mighty)
    # =========================================================================
    "microsoft/Phi-3.5-mini-instruct": ModelInfo(
        model_id="microsoft/Phi-3.5-mini-instruct",
        provider="Microsoft",
        context_length=128000,
        strengths=["Very fast", "Great structured output", "Long context"],
        best_for="Agent 2 (Normalizer) β€” RECOMMENDED",
        tier="free"
    ),
    "microsoft/Phi-3-medium-4k-instruct": ModelInfo(
        model_id="microsoft/Phi-3-medium-4k-instruct",
        provider="Microsoft",
        context_length=4096,
        strengths=["Fast", "Good for simple tasks"],
        best_for="Simple naming tasks",
        tier="free"
    ),
    
    # =========================================================================
    # QWEN β€” Alibaba Family
    # =========================================================================
    "Qwen/Qwen2.5-72B-Instruct": ModelInfo(
        model_id="Qwen/Qwen2.5-72B-Instruct",
        provider="Alibaba",
        context_length=32768,
        strengths=["Strong reasoning", "Multilingual", "Good design knowledge"],
        best_for="Agent 3 (Advisor) β€” Alternative",
        tier="pro"
    ),
    "Qwen/Qwen2.5-32B-Instruct": ModelInfo(
        model_id="Qwen/Qwen2.5-32B-Instruct",
        provider="Alibaba",
        context_length=32768,
        strengths=["Good balance", "Multilingual"],
        best_for="Medium-tier option",
        tier="pro"
    ),
    "Qwen/Qwen2.5-Coder-32B-Instruct": ModelInfo(
        model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
        provider="Alibaba",
        context_length=32768,
        strengths=["Code specialist", "JSON/structured output"],
        best_for="Agent 4 (Generator) β€” Alternative",
        tier="pro"
    ),
    "Qwen/Qwen2.5-7B-Instruct": ModelInfo(
        model_id="Qwen/Qwen2.5-7B-Instruct",
        provider="Alibaba",
        context_length=32768,
        strengths=["Fast", "Good all-rounder"],
        best_for="General fallback",
        tier="free"
    ),
    
    # =========================================================================
    # DEEPSEEK β€” Code Specialists
    # =========================================================================
    "deepseek-ai/deepseek-coder-33b-instruct": ModelInfo(
        model_id="deepseek-ai/deepseek-coder-33b-instruct",
        provider="DeepSeek",
        context_length=16384,
        strengths=["Excellent code generation", "JSON specialist"],
        best_for="Agent 4 (Generator) β€” Code focused",
        tier="pro"
    ),
    "deepseek-ai/DeepSeek-V2.5": ModelInfo(
        model_id="deepseek-ai/DeepSeek-V2.5",
        provider="DeepSeek",
        context_length=32768,
        strengths=["Strong reasoning", "Good code"],
        best_for="Multi-purpose",
        tier="pro"
    ),
    
    # =========================================================================
    # BIGCODE β€” StarCoder Family
    # =========================================================================
    "bigcode/starcoder2-15b-instruct-v0.1": ModelInfo(
        model_id="bigcode/starcoder2-15b-instruct-v0.1",
        provider="BigCode",
        context_length=16384,
        strengths=["Code generation", "Multiple languages"],
        best_for="Agent 4 (Generator) β€” Open source code model",
        tier="free"
    ),
}


# =============================================================================
# RECOMMENDED CONFIGURATIONS BY TIER
# =============================================================================

MODEL_PRESETS = {
    "budget": {
        "name": "Budget (Free Tier)",
        "description": "Best free models for each task",
        "agent2": "microsoft/Phi-3.5-mini-instruct",
        "agent3": "mistralai/Mixtral-8x7B-Instruct-v0.1",
        "agent4": "bigcode/starcoder2-15b-instruct-v0.1",
        "fallback": "mistralai/Mistral-7B-Instruct-v0.3",
    },
    "balanced": {
        "name": "Balanced (Pro Tier)",
        "description": "Good quality/cost balance",
        "agent2": "google/gemma-2-9b-it",
        "agent3": "meta-llama/Llama-3.1-70B-Instruct",
        "agent4": "mistralai/Codestral-22B-v0.1",
        "fallback": "Qwen/Qwen2.5-7B-Instruct",
    },
    "quality": {
        "name": "Maximum Quality (Pro+)",
        "description": "Best models regardless of cost",
        "agent2": "google/gemma-2-27b-it",
        "agent3": "meta-llama/Llama-3.1-405B-Instruct",
        "agent4": "deepseek-ai/deepseek-coder-33b-instruct",
        "fallback": "meta-llama/Llama-3.1-8B-Instruct",
    },
    "diverse": {
        "name": "Diverse Providers",
        "description": "One model from each major provider",
        "agent2": "microsoft/Phi-3.5-mini-instruct",  # Microsoft
        "agent3": "CohereForAI/c4ai-command-r-plus",  # Cohere
        "agent4": "mistralai/Codestral-22B-v0.1",     # Mistral
        "fallback": "meta-llama/Llama-3.1-8B-Instruct",  # Meta
    },
}


# =============================================================================
# AGENT-SPECIFIC RECOMMENDATIONS
# =============================================================================

AGENT_MODEL_RECOMMENDATIONS = {
    "crawler": {
        "requires_llm": False,
        "notes": "Pure rule-based extraction using Playwright + CSS parsing"
    },
    "extractor": {
        "requires_llm": False,
        "notes": "Pure rule-based extraction using Playwright + CSS parsing"
    },
    "normalizer": {
        "requires_llm": True,
        "task": "Token naming, duplicate detection, pattern inference",
        "needs": ["Fast inference", "Good instruction following", "Structured output"],
        "recommended": [
            ("microsoft/Phi-3.5-mini-instruct", "BEST β€” Fast, great structured output"),
            ("google/gemma-2-9b-it", "Good balance of speed and quality"),
            ("Qwen/Qwen2.5-7B-Instruct", "Reliable all-rounder"),
        ],
        "temperature": 0.2,
    },
    "advisor": {
        "requires_llm": True,
        "task": "Design system analysis, best practice recommendations",
        "needs": ["Strong reasoning", "Design knowledge", "Creative suggestions"],
        "recommended": [
            ("meta-llama/Llama-3.1-70B-Instruct", "BEST β€” Excellent reasoning"),
            ("CohereForAI/c4ai-command-r-plus", "Great for analysis tasks"),
            ("Qwen/Qwen2.5-72B-Instruct", "Strong alternative"),
            ("mistralai/Mixtral-8x7B-Instruct-v0.1", "Best free option"),
        ],
        "temperature": 0.4,
    },
    "generator": {
        "requires_llm": True,
        "task": "Generate JSON tokens, CSS variables, structured output",
        "needs": ["Code generation", "JSON formatting", "Schema adherence"],
        "recommended": [
            ("mistralai/Codestral-22B-v0.1", "BEST β€” Mistral's code model"),
            ("deepseek-ai/deepseek-coder-33b-instruct", "Excellent code specialist"),
            ("Qwen/Qwen2.5-Coder-32B-Instruct", "Strong code model"),
            ("bigcode/starcoder2-15b-instruct-v0.1", "Best free option"),
        ],
        "temperature": 0.1,
    },
}


# =============================================================================
# INFERENCE CLIENT
# =============================================================================

class HFInferenceClient:
    """
    Wrapper around HuggingFace Inference API.
    
    Handles model selection, retries, and fallbacks.
    """
    
    def __init__(self):
        self.settings = get_settings()
        # Read token fresh from env β€” the Settings singleton may have been
        # created before the user entered their token via the Gradio UI.
        self.token = os.getenv("HF_TOKEN", "") or self.settings.hf.hf_token

        if not self.token:
            raise ValueError("HF_TOKEN is required for inference")

        # Let huggingface_hub route to the best available provider automatically.
        # Do NOT set base_url (overrides per-model routing) or
        # provider="hf-inference" (that provider no longer hosts most models).
        # The default provider="auto" picks the first available third-party
        # provider (novita, together, cerebras, etc.) for each model.
        self.sync_client = InferenceClient(token=self.token)
        self.async_client = AsyncInferenceClient(token=self.token)
    
    def get_model_for_agent(self, agent_name: str) -> str:
        """Get the appropriate model for an agent."""
        return self.settings.get_model_for_agent(agent_name)
    
    def get_temperature_for_agent(self, agent_name: str) -> float:
        """Get recommended temperature for an agent."""
        temps = {
            "normalizer": 0.2,  # Consistent naming
            "advisor": 0.4,    # Creative recommendations
            "generator": 0.1,  # Precise formatting
        }
        return temps.get(agent_name, 0.3)
    
    def _build_messages(
        self,
        system_prompt: str,
        user_message: str,
        examples: list[dict] = None
    ) -> list[dict]:
        """Build message list for chat completion."""
        messages = []
        
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        
        if examples:
            for example in examples:
                messages.append({"role": "user", "content": example["user"]})
                messages.append({"role": "assistant", "content": example["assistant"]})
        
        messages.append({"role": "user", "content": user_message})
        
        return messages
    
    def complete(
        self,
        agent_name: str,
        system_prompt: str,
        user_message: str,
        examples: list[dict] = None,
        max_tokens: int = None,
        temperature: float = None,
        json_mode: bool = False,
    ) -> str:
        """
        Synchronous completion.
        
        Args:
            agent_name: Which agent is making the call (for model selection)
            system_prompt: System instructions
            user_message: User input
            examples: Optional few-shot examples
            max_tokens: Max tokens to generate
            temperature: Sampling temperature (uses agent default if not specified)
            json_mode: If True, instruct model to output JSON
        
        Returns:
            Generated text
        """
        model = self.get_model_for_agent(agent_name)
        max_tokens = max_tokens or self.settings.hf.max_new_tokens
        temperature = temperature or self.get_temperature_for_agent(agent_name)
        
        # Build messages
        if json_mode:
            system_prompt = f"{system_prompt}\n\nYou must respond with valid JSON only. No markdown, no explanation, just JSON."
        
        messages = self._build_messages(system_prompt, user_message, examples)
        
        try:
            response = self.sync_client.chat_completion(
                model=model,
                messages=messages,
                max_tokens=max_tokens,
                temperature=temperature,
            )
            return response.choices[0].message.content

        except Exception as e:
            error_msg = str(e)
            print(f"[HF] Primary model {model} failed: {error_msg[:120]}")
            fallback = self.settings.models.fallback_model
            if fallback and fallback != model:
                print(f"[HF] Trying fallback: {fallback}")
                try:
                    response = self.sync_client.chat_completion(
                        model=fallback,
                        messages=messages,
                        max_tokens=max_tokens,
                        temperature=temperature,
                    )
                    return response.choices[0].message.content
                except Exception as fallback_err:
                    print(f"[HF] Fallback {fallback} also failed: {str(fallback_err)[:120]}")
                    raise fallback_err
            raise e
    
    async def complete_async(
        self,
        agent_name: str,
        system_prompt: str,
        user_message: str,
        examples: list[dict] = None,
        max_tokens: int = None,
        temperature: float = None,
        json_mode: bool = False,
    ) -> str:
        """
        Asynchronous completion.
        
        Same parameters as complete().
        """
        model = self.get_model_for_agent(agent_name)
        max_tokens = max_tokens or self.settings.hf.max_new_tokens
        temperature = temperature or self.get_temperature_for_agent(agent_name)
        
        if json_mode:
            system_prompt = f"{system_prompt}\n\nYou must respond with valid JSON only. No markdown, no explanation, just JSON."
        
        messages = self._build_messages(system_prompt, user_message, examples)
        
        try:
            response = await self.async_client.chat_completion(
                model=model,
                messages=messages,
                max_tokens=max_tokens,
                temperature=temperature,
            )
            return response.choices[0].message.content

        except Exception as e:
            error_msg = str(e)
            print(f"[HF] Primary model {model} failed: {error_msg[:120]}")
            fallback = self.settings.models.fallback_model
            if fallback and fallback != model:
                print(f"[HF] Trying fallback: {fallback}")
                try:
                    response = await self.async_client.chat_completion(
                        model=fallback,
                        messages=messages,
                        max_tokens=max_tokens,
                        temperature=temperature,
                    )
                    return response.choices[0].message.content
                except Exception as fallback_err:
                    print(f"[HF] Fallback {fallback} also failed: {str(fallback_err)[:120]}")
                    raise fallback_err
            raise e
    
    async def stream_async(
        self,
        agent_name: str,
        system_prompt: str,
        user_message: str,
        max_tokens: int = None,
        temperature: float = None,
    ) -> AsyncGenerator[str, None]:
        """
        Async streaming completion.
        
        Yields tokens as they are generated.
        """
        model = self.get_model_for_agent(agent_name)
        max_tokens = max_tokens or self.settings.hf.max_new_tokens
        temperature = temperature or self.get_temperature_for_agent(agent_name)
        
        messages = self._build_messages(system_prompt, user_message)
        
        async for chunk in await self.async_client.chat_completion(
            model=model,
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            stream=True,
        ):
            if chunk.choices[0].delta.content:
                yield chunk.choices[0].delta.content


# =============================================================================
# SINGLETON & CONVENIENCE FUNCTIONS
# =============================================================================

_client: Optional[HFInferenceClient] = None


def get_inference_client() -> HFInferenceClient:
    """Get or create the inference client singleton.

    Re-creates the client if the token has changed (e.g. user entered it
    via the Gradio UI after initial startup).
    """
    global _client
    current_token = os.getenv("HF_TOKEN", "")
    if _client is None or (_client.token != current_token and current_token):
        _client = HFInferenceClient()
    return _client


def complete(
    agent_name: str,
    system_prompt: str,
    user_message: str,
    **kwargs
) -> str:
    """Convenience function for sync completion."""
    client = get_inference_client()
    return client.complete(agent_name, system_prompt, user_message, **kwargs)


async def complete_async(
    agent_name: str,
    system_prompt: str,
    user_message: str,
    **kwargs
) -> str:
    """Convenience function for async completion."""
    client = get_inference_client()
    return await client.complete_async(agent_name, system_prompt, user_message, **kwargs)


def get_model_info(model_id: str) -> dict:
    """Get information about a specific model."""
    if model_id in AVAILABLE_MODELS:
        info = AVAILABLE_MODELS[model_id]
        return {
            "model_id": info.model_id,
            "provider": info.provider,
            "context_length": info.context_length,
            "strengths": info.strengths,
            "best_for": info.best_for,
            "tier": info.tier,
        }
    return {"model_id": model_id, "provider": "unknown"}


def get_models_by_provider() -> dict[str, list[str]]:
    """Get all models grouped by provider."""
    by_provider = {}
    for model_id, info in AVAILABLE_MODELS.items():
        if info.provider not in by_provider:
            by_provider[info.provider] = []
        by_provider[info.provider].append(model_id)
    return by_provider


def get_models_by_tier(tier: str) -> list[str]:
    """Get all models for a specific tier (free, pro, pro+)."""
    return [
        model_id for model_id, info in AVAILABLE_MODELS.items()
        if info.tier == tier
    ]


def get_preset_config(preset_name: str) -> dict:
    """Get a preset model configuration."""
    return MODEL_PRESETS.get(preset_name, MODEL_PRESETS["balanced"])