Spaces:

moriire
/

OpenGenAI

Sleeping

moriire commited on Apr 7, 2024

Commit

aad9e06

verified ·

1 Parent(s): d861c90

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,9 +13,9 @@ class GenModel(BaseModel):
     question: str
     system: str = "You are a helpful medical AI assistant. Help as much as you can. Remember, response in English."
     temperature: float = 0.8
-    seed: int = 101,
-    mirostat_mode: int=2,
-    mirostat_tau: float=4.0,
     mirostat_eta: float=1.1
 llm_chat = llama_cpp.Llama.from_pretrained(
@@ -77,12 +77,11 @@ async def chat(gen:GenModel):
             seed=gen.seed,
             #stream=True
         )
-        messages.append({"role": "user", "content": gen.question},)
         print(output)
         et = time()
         output["time"] = et - st
-        messages.append({'role': "assistant", "content": output['choices'][0]['message']})
         print(messages)
         return output
     except Exception as e:
@@ -94,9 +93,9 @@ async def chat(gen:GenModel):
 # Chat Completion API
 @app.post("/generate")
 async def generate(gen:GenModel):
-    gen.system = "You are an AI assistant."
     gen.temperature = 0.5
-    gen.seed: int = 42
     try:
         st = time()
         output = llm_generate.create_chat_completion(
@@ -119,7 +118,7 @@ async def generate(gen:GenModel):
             #print(chunk)
         """
         et = time()
-        #output["time"] = et - st
         return output
     except Exception as e:
         logger.error(f"Error in /generate endpoint: {e}")

     question: str
     system: str = "You are a helpful medical AI assistant. Help as much as you can. Remember, response in English."
     temperature: float = 0.8
+    seed: int = 101
+    mirostat_mode: int=2
+    mirostat_tau: float=4.0
     mirostat_eta: float=1.1
 llm_chat = llama_cpp.Llama.from_pretrained(
             seed=gen.seed,
             #stream=True
         )
+        messages.append({"role": "user", "content": gen.question})
         print(output)
         et = time()
         output["time"] = et - st
+        messages.append({'role': "assistant", "content": output['choices'][0]['message']['content']})
         print(messages)
         return output
     except Exception as e:
 # Chat Completion API
 @app.post("/generate")
 async def generate(gen:GenModel):
+    gen.system = "You are an helpful medical AI assistant."
     gen.temperature = 0.5
+    gen.seed = 42
     try:
         st = time()
         output = llm_generate.create_chat_completion(
             #print(chunk)
         """
         et = time()
+        output["time"] = et - st
         return output
     except Exception as e:
         logger.error(f"Error in /generate endpoint: {e}")