ujalaarshad17 commited on
Commit
df4263a
·
1 Parent(s): 1e949b6

Add large file with Git LFS

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.gguf filter=lfs diff=lfs merge=lfs -text
main.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from models import SQLContext
3
+ from llama_cpp import Llama
4
+ import logging
5
+
6
+ llm = Llama(
7
+ model_path="./quantized_model/sql_gpt_quantized.gguf",
8
+ n_ctx=512,
9
+ n_threads=2,
10
+ n_gpu_layers=-1
11
+ )
12
+
13
+ app = FastAPI()
14
+
15
+ @app.get("/")
16
+ def root():
17
+ return {"Hello": "World"}
18
+
19
+ @app.post("/query")
20
+ async def send_response(query: SQLContext):
21
+ query_text = f'''
22
+ ### CONTEXT:\n{query.context}\n\n### QUESTION:{query.question}\n\n### [RESPONSE]:\n"
23
+ '''
24
+ try:
25
+ output = llm(
26
+ prompt=query_text,
27
+ max_tokens=512,
28
+ )
29
+ response_text = output["choices"][0]["text"].split('###')[0].strip('"')
30
+ return {"response": response_text}
31
+ except Exception as e:
32
+ logging.error(f"Error generating response: {e}")
33
+ raise HTTPException(status_code=500, detail="Internal server error while processing the query.")
quantized_model/sql_gpt_quantized.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34010ed42c25cc6e59c3f0f2af2ea7c3000363a264efab975e96c0489288060c
3
+ size 1708582432