Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from models import SQLContext | |
| from llama_cpp import Llama | |
| import logging | |
| llm = Llama( | |
| model_path="./quantized_model/sql_gpt_quantized.gguf", | |
| n_ctx=512, | |
| n_threads=1, | |
| n_gpu_layers=-1 | |
| ) | |
| app = FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], # Allow your frontend origin | |
| allow_credentials=True, | |
| allow_methods=["*"], # Allow all methods | |
| allow_headers=["*"], # Allow all headers | |
| ) | |
| def root(): | |
| return {"Hello": "World"} | |
| async def send_response(query: SQLContext): | |
| query_text = f''' | |
| ### CONTEXT:\n{query.context}\n\n### QUESTION:{query.question}\n\n### [RESPONSE]:\n" | |
| ''' | |
| try: | |
| output = llm( | |
| prompt=query_text, | |
| max_tokens=512, | |
| ) | |
| response_text = output["choices"][0]["text"].split('###')[0].strip().strip('"') | |
| return {"response": response_text} | |
| except Exception as e: | |
| logging.error(f"Error generating response: {e}") | |
| raise HTTPException(status_code=500, detail="Internal server error while processing the query.") | |