Spaces:

krupakar-reddy
/

LLM-api-deployment-test

Runtime error

krupakar-reddy commited on May 15, 2024

Commit

6efe7b3

verified ·

1 Parent(s): fca2aca

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -5,9 +5,9 @@ from llama_cpp import Llama
 model_path = "mistral-7b-v0.1.Q4_K_M.gguf"
 llm = Llama(
-    model_path=model_path,
-    n_ctx=4096,
-    n_threads=4,
 )
 app = FastAPI()
@@ -16,22 +16,20 @@ class DSAPrompt(BaseModel):
     prompt: str
 @app.post("/solve_dsa_problem")
-async def solve_dsa_problem(request: Request, item: DSAPrompt):
     system_prompt = '''
                     You are a Data Structures and Algorithm problem solver. You are given the following problem and you need to solve it.
                     Give a detailed explanation of the solution approach and the code in C++, Java, or Python.
                     If the input is not a DSA problem, politely refuse their request and reinsist to provide a DSA problem.
                     '''
-    async with llm.create_chat_completion(
         messages=[
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": item.prompt}
         ],
-        temperature=0.9,
-        max_tokens=2048,
-        stream=True
-    ) as stream_result:
-        async for chunk in stream_result:
-            yield chunk

 model_path = "mistral-7b-v0.1.Q4_K_M.gguf"
 llm = Llama(
+    model_path = model_path,
+    n_ctx = 4096,
+    n_threads = 4,
 )
 app = FastAPI()
     prompt: str
 @app.post("/solve_dsa_problem")
+async def solve_dsa_problem(item: DSAPrompt):
     system_prompt = '''
                     You are a Data Structures and Algorithm problem solver. You are given the following problem and you need to solve it.
                     Give a detailed explanation of the solution approach and the code in C++, Java, or Python.
                     If the input is not a DSA problem, politely refuse their request and reinsist to provide a DSA problem.
                     '''
+    res = llm.create_chat_completion(
         messages=[
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": item.prompt}
         ],
+        temperature = 0.9,
+        max_tokens = 2048,
+    )
+    return res