Spaces:
Build error
Build error
| import os | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| # Configuration | |
| REPO_ID = "MaziyarPanahi/Llama-3-8B-Instruct-v0.3-GGUF" | |
| FILENAME = "Llama-3-8B-Instruct-v0.3.Q4_K_M.gguf" | |
| MODEL_PATH = os.path.join(os.path.dirname(__file__), "models", FILENAME) | |
| def get_model(): | |
| """ | |
| Downloads the model if not present, then loads it into memory. | |
| Returns a Llama instance. | |
| """ | |
| if not os.path.exists(MODEL_PATH): | |
| print(f"⬇️ Model not found. Downloading {FILENAME} from Hugging Face...") | |
| os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True) | |
| hf_hub_download( | |
| repo_id=REPO_ID, | |
| filename=FILENAME, | |
| local_dir=os.path.dirname(MODEL_PATH), | |
| local_dir_use_symlinks=False | |
| ) | |
| print("✅ Download complete.") | |
| else: | |
| print(f"✅ Model found at {MODEL_PATH}") | |
| print("🚀 Loading Llama-3 into memory (CPU Mode)...") | |
| # Initialize Llama (Free Tier: 2 vCPU, 16GB RAM) | |
| # n_ctx=2048 (Context window) | |
| llm = Llama( | |
| model_path=MODEL_PATH, | |
| n_ctx=2048, | |
| n_threads=2, # Optimizing for HF Spaces Free Tier | |
| verbose=False | |
| ) | |
| return llm | |
| # Global instance for re-use | |
| _llm_instance = None | |
| def generate_response(prompt: str, system_prompt: str = "") -> str: | |
| global _llm_instance | |
| if _llm_instance is None: | |
| _llm_instance = get_model() | |
| full_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" | |
| output = _llm_instance( | |
| full_prompt, | |
| max_tokens=512, | |
| stop=["<|eot_id|>"], | |
| echo=False | |
| ) | |
| return output['choices'][0]['text'] | |