Spaces:

code4startup
/

smolvlm

Build error

smolvlm / app.py

leotrieu

Initialize app

9c08c69 5 months ago

1.34 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	from llama_cpp import Llama
	import os

	# Define the FastAPI app
	app = FastAPI()

	# Path to the GGUF model file
	MODEL_NAME = "SmolVLM-500M-Instruct-GGUF.Q4_K_M.gguf"
	MODEL_PATH = f"./{MODEL_NAME}"

	# Download the model from the Hub if it's not present
	if not os.path.exists(MODEL_PATH):
	from huggingface_hub import hf_hub_download
	hf_hub_download(
	repo_id="ggml-org/SmolVLM-500M-Instruct-GGUF",
	filename=MODEL_NAME,
	local_dir=".",
	local_dir_use_symlinks=False
	)

	# Load the Llama model
	try:
	llm = Llama(model_path=MODEL_PATH, n_ctx=2048, verbose=False)
	except Exception as e:
	print(f"Error loading model: {e}")
	llm = None

	class InferenceRequest(BaseModel):
	prompt: str

	@app.post("/generate")
	def generate_text(request: InferenceRequest):
	if llm is None:
	return {"error": "Model not loaded"}, 500

	try:
	output = llm.create_completion(
	prompt=request.prompt,
	max_tokens=256,
	stop=["<\|im_end\|>", "</s>"],
	temperature=0.7
	)
	return {"text": output["choices"][0]["text"].strip()}
	except Exception as e:
	return {"error": str(e)}, 500

	@app.get("/")
	def health_check():
	return {"status": "ok", "model_loaded": llm is not None}