File size: 1,189 Bytes
df4263a
1eeb284
df4263a
 
 
 
 
 
 
eea8e3d
df4263a
 
 
 
1eeb284
 
 
 
 
 
 
df4263a
 
 
 
 
 
 
 
 
 
 
 
 
 
9466107
df4263a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from models import SQLContext
from llama_cpp import Llama
import logging

llm = Llama(
    model_path="./quantized_model/sql_gpt_quantized.gguf", 
    n_ctx=512,
    n_threads=1,
    n_gpu_layers=-1
)

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allow your frontend origin
    allow_credentials=True,
    allow_methods=["*"],  # Allow all methods
    allow_headers=["*"],  # Allow all headers
)
@app.get("/")
def root():
    return {"Hello": "World"}

@app.post("/query")
async def send_response(query: SQLContext):
    query_text = f'''
    ### CONTEXT:\n{query.context}\n\n### QUESTION:{query.question}\n\n### [RESPONSE]:\n"
    '''
    try:
        output = llm(
            prompt=query_text,
            max_tokens=512,
        )
        response_text = output["choices"][0]["text"].split('###')[0].strip().strip('"')
        return {"response": response_text}
    except Exception as e:
        logging.error(f"Error generating response: {e}")
        raise HTTPException(status_code=500, detail="Internal server error while processing the query.")