Update app.py
Browse files
app.py
CHANGED
|
@@ -13,9 +13,9 @@ class GenModel(BaseModel):
|
|
| 13 |
question: str
|
| 14 |
system: str = "You are a helpful medical AI assistant. Help as much as you can. Remember, response in English."
|
| 15 |
temperature: float = 0.8
|
| 16 |
-
seed: int = 101
|
| 17 |
-
mirostat_mode: int=2
|
| 18 |
-
mirostat_tau: float=4.0
|
| 19 |
mirostat_eta: float=1.1
|
| 20 |
|
| 21 |
llm_chat = llama_cpp.Llama.from_pretrained(
|
|
@@ -77,12 +77,11 @@ async def chat(gen:GenModel):
|
|
| 77 |
seed=gen.seed,
|
| 78 |
#stream=True
|
| 79 |
)
|
| 80 |
-
messages.append({"role": "user", "content": gen.question}
|
| 81 |
print(output)
|
| 82 |
-
|
| 83 |
et = time()
|
| 84 |
output["time"] = et - st
|
| 85 |
-
messages.append({'role': "assistant", "content": output['choices'][0]['message']})
|
| 86 |
print(messages)
|
| 87 |
return output
|
| 88 |
except Exception as e:
|
|
@@ -94,9 +93,9 @@ async def chat(gen:GenModel):
|
|
| 94 |
# Chat Completion API
|
| 95 |
@app.post("/generate")
|
| 96 |
async def generate(gen:GenModel):
|
| 97 |
-
gen.system = "You are an AI assistant."
|
| 98 |
gen.temperature = 0.5
|
| 99 |
-
gen.seed
|
| 100 |
try:
|
| 101 |
st = time()
|
| 102 |
output = llm_generate.create_chat_completion(
|
|
@@ -119,7 +118,7 @@ async def generate(gen:GenModel):
|
|
| 119 |
#print(chunk)
|
| 120 |
"""
|
| 121 |
et = time()
|
| 122 |
-
|
| 123 |
return output
|
| 124 |
except Exception as e:
|
| 125 |
logger.error(f"Error in /generate endpoint: {e}")
|
|
|
|
| 13 |
question: str
|
| 14 |
system: str = "You are a helpful medical AI assistant. Help as much as you can. Remember, response in English."
|
| 15 |
temperature: float = 0.8
|
| 16 |
+
seed: int = 101
|
| 17 |
+
mirostat_mode: int=2
|
| 18 |
+
mirostat_tau: float=4.0
|
| 19 |
mirostat_eta: float=1.1
|
| 20 |
|
| 21 |
llm_chat = llama_cpp.Llama.from_pretrained(
|
|
|
|
| 77 |
seed=gen.seed,
|
| 78 |
#stream=True
|
| 79 |
)
|
| 80 |
+
messages.append({"role": "user", "content": gen.question})
|
| 81 |
print(output)
|
|
|
|
| 82 |
et = time()
|
| 83 |
output["time"] = et - st
|
| 84 |
+
messages.append({'role': "assistant", "content": output['choices'][0]['message']['content']})
|
| 85 |
print(messages)
|
| 86 |
return output
|
| 87 |
except Exception as e:
|
|
|
|
| 93 |
# Chat Completion API
|
| 94 |
@app.post("/generate")
|
| 95 |
async def generate(gen:GenModel):
|
| 96 |
+
gen.system = "You are an helpful medical AI assistant."
|
| 97 |
gen.temperature = 0.5
|
| 98 |
+
gen.seed = 42
|
| 99 |
try:
|
| 100 |
st = time()
|
| 101 |
output = llm_generate.create_chat_completion(
|
|
|
|
| 118 |
#print(chunk)
|
| 119 |
"""
|
| 120 |
et = time()
|
| 121 |
+
output["time"] = et - st
|
| 122 |
return output
|
| 123 |
except Exception as e:
|
| 124 |
logger.error(f"Error in /generate endpoint: {e}")
|