Spaces:
Sleeping
Sleeping
File size: 1,189 Bytes
df4263a 1eeb284 df4263a eea8e3d df4263a 1eeb284 df4263a 9466107 df4263a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from models import SQLContext
from llama_cpp import Llama
import logging
llm = Llama(
model_path="./quantized_model/sql_gpt_quantized.gguf",
n_ctx=512,
n_threads=1,
n_gpu_layers=-1
)
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allow your frontend origin
allow_credentials=True,
allow_methods=["*"], # Allow all methods
allow_headers=["*"], # Allow all headers
)
@app.get("/")
def root():
return {"Hello": "World"}
@app.post("/query")
async def send_response(query: SQLContext):
query_text = f'''
### CONTEXT:\n{query.context}\n\n### QUESTION:{query.question}\n\n### [RESPONSE]:\n"
'''
try:
output = llm(
prompt=query_text,
max_tokens=512,
)
response_text = output["choices"][0]["text"].split('###')[0].strip().strip('"')
return {"response": response_text}
except Exception as e:
logging.error(f"Error generating response: {e}")
raise HTTPException(status_code=500, detail="Internal server error while processing the query.")
|