Hbdb / app.py
Hjgugugjhuhjggg's picture
Update app.py
7948e3d verified
import subprocess
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional
app = FastAPI()
class GenerationRequest(BaseModel):
prompt: str
seq_len: Optional[int] = 128000
temperature: Optional[float] = 0.8
cpu_threads: Optional[int] = -1
model_path: Optional[str] = "/content/llama-model/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8.pte"
tokenizer_path: Optional[str] = "/content/llama-model/tokenizer.model"
@app.post("/generate/")
async def generate_text(request: GenerationRequest):
command = [
"/content/executorch/cmake-out/examples/models/llama/llama_main",
f"--model_path={request.model_path}",
f"--tokenizer_path={request.tokenizer_path}",
f"--prompt={request.prompt}",
f"--temperature={request.temperature}",
f"--seq_len={request.seq_len}",
f"--cpu_threads={request.cpu_threads}"
]
result = subprocess.run(command, capture_output=True, text=True)
if result.returncode != 0:
return {"error": result.stderr}
return {"generated_text": result.stdout}