Spaces:
Sleeping
Sleeping
| '''Defines available model configurations. | |
| Maps three tiers (“tiny”, “small”, “medium”) to their model filename, Hugging Face repo, required GPU VRAM, and required system RAM. | |
| get_system_specs() uses psutil to compute total system RAM in GB and torch.cuda to query GPU VRAM in GB (zero if no CUDA device). | |
| select_best_model() prints detected RAM and GPU VRAM, chooses “small” if GPU VRAM ≥ 4 GB or if RAM ≥ 8 GB, otherwise “tiny”, prints the chosen tier and model name, and returns the model filename and repo string. | |
| ''' | |
| import os | |
| import psutil | |
| from typing import Tuple, Dict | |
| import torch | |
| import torchaudio.transforms as T | |
| from huggingface_hub import hf_hub_download | |
| from typing import Optional | |
| from llama_index.llms.llama_cpp import LlamaCPP | |
| # Model options mapped to their requirements | |
| MODEL_OPTIONS = { | |
| "tiny": { | |
| "name": "TinyLlama-1.1B-Chat-v1.0.Q4_K_M.gguf", | |
| "repo": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", | |
| "vram_req": 2, # GB | |
| "ram_req": 4 # GB | |
| }, | |
| "small": { | |
| "name": "phi-2.Q4_K_M.gguf", | |
| "repo": "TheBloke/phi-2-GGUF", | |
| "vram_req": 4, | |
| "ram_req": 8 | |
| }, | |
| "medium": { | |
| "name": "mistral-7b-instruct-v0.1.Q4_K_M.gguf", | |
| "repo": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", | |
| "vram_req": 6, | |
| "ram_req": 16 | |
| } | |
| } | |
| def get_system_specs() -> Dict[str, float]: | |
| """Get system specifications.""" | |
| # Get RAM | |
| ram_gb = psutil.virtual_memory().total / (1024**3) | |
| # Get GPU info if available | |
| gpu_vram_gb = 0 | |
| if torch.cuda.is_available(): | |
| try: | |
| # Query GPU memory in bytes and convert to GB | |
| gpu_vram_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3) | |
| except Exception as e: | |
| print(f"Warning: Could not get GPU memory: {e}") | |
| return { | |
| "ram_gb": ram_gb, | |
| "gpu_vram_gb": gpu_vram_gb | |
| } | |
| def select_best_model() -> Tuple[str, str]: | |
| """Select the best model based on system specifications.""" | |
| specs = get_system_specs() | |
| print(f"\nSystem specifications:") | |
| print(f"RAM: {specs['ram_gb']:.1f} GB") | |
| print(f"GPU VRAM: {specs['gpu_vram_gb']:.1f} GB") | |
| # Prioritize GPU if available | |
| if specs['gpu_vram_gb'] >= 4: # You have 6GB, so this should work | |
| model_tier = "small" # phi-2 should work well on RTX 2060 | |
| elif specs['ram_gb'] >= 8: | |
| model_tier = "small" | |
| else: | |
| model_tier = "tiny" | |
| selected = MODEL_OPTIONS[model_tier] | |
| print(f"\nSelected model tier: {model_tier}") | |
| print(f"Model: {selected['name']}") | |
| return selected['name'], selected['repo'] | |
| def ensure_model(model_name: Optional[str] = None, repo_id: Optional[str] = None) -> str: | |
| """Ensures model is available, downloading only if needed.""" | |
| BASE_DIR = os.path.dirname(os.path.dirname(__file__)) | |
| # Determine environment and set cache directory | |
| if os.path.exists("/home/user"): | |
| # HF Space environment | |
| cache_dir = "/home/user/.cache/models" | |
| else: | |
| # Local development environment | |
| cache_dir = os.path.join(BASE_DIR, "models") | |
| # Create cache directory if it doesn't exist | |
| try: | |
| os.makedirs(cache_dir, exist_ok=True) | |
| except Exception as e: | |
| print(f"Warning: Could not create cache directory {cache_dir}: {e}") | |
| # Fall back to temporary directory if needed | |
| cache_dir = os.path.join("/tmp", "models") | |
| os.makedirs(cache_dir, exist_ok=True) | |
| # Get model details | |
| if not model_name or not repo_id: | |
| model_option = MODEL_OPTIONS["small"] # default to small model | |
| model_name = model_option["name"] | |
| repo_id = model_option["repo"] | |
| # Ensure model_name and repo_id are not None | |
| if model_name is None: | |
| raise ValueError("model_name cannot be None") | |
| if repo_id is None: | |
| raise ValueError("repo_id cannot be None") | |
| # Check if model already exists in cache | |
| model_path = os.path.join(cache_dir, model_name) | |
| if os.path.exists(model_path): | |
| print(f"\nUsing cached model: {model_path}") | |
| return model_path | |
| print(f"\nDownloading model {model_name} from {repo_id}...") | |
| model_path = hf_hub_download( | |
| repo_id=repo_id, | |
| filename=model_name, | |
| cache_dir=cache_dir, | |
| local_dir=cache_dir | |
| ) | |
| print(f"Model downloaded successfully to {model_path}") | |
| return model_path | |