Spaces:
Build error
Build error
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from llama_cpp import Llama | |
| import os | |
| # Define the FastAPI app | |
| app = FastAPI() | |
| # Path to the GGUF model file | |
| MODEL_NAME = "SmolVLM-500M-Instruct-GGUF.Q4_K_M.gguf" | |
| MODEL_PATH = f"./{MODEL_NAME}" | |
| # Download the model from the Hub if it's not present | |
| if not os.path.exists(MODEL_PATH): | |
| from huggingface_hub import hf_hub_download | |
| hf_hub_download( | |
| repo_id="ggml-org/SmolVLM-500M-Instruct-GGUF", | |
| filename=MODEL_NAME, | |
| local_dir=".", | |
| local_dir_use_symlinks=False | |
| ) | |
| # Load the Llama model | |
| try: | |
| llm = Llama(model_path=MODEL_PATH, n_ctx=2048, verbose=False) | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| llm = None | |
| class InferenceRequest(BaseModel): | |
| prompt: str | |
| def generate_text(request: InferenceRequest): | |
| if llm is None: | |
| return {"error": "Model not loaded"}, 500 | |
| try: | |
| output = llm.create_completion( | |
| prompt=request.prompt, | |
| max_tokens=256, | |
| stop=["<|im_end|>", "</s>"], | |
| temperature=0.7 | |
| ) | |
| return {"text": output["choices"][0]["text"].strip()} | |
| except Exception as e: | |
| return {"error": str(e)}, 500 | |
| def health_check(): | |
| return {"status": "ok", "model_loaded": llm is not None} |