Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -61,10 +61,18 @@ qwen = HuggingFaceEndpoint(
|
|
| 61 |
do_sample=False,
|
| 62 |
)
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
llm = prompt_qwen | qwen
|
| 65 |
|
| 66 |
llm2 = prompt_llama | llama
|
| 67 |
|
|
|
|
| 68 |
# llm = prompt | CustomLLM(repo_id="Qwen/Qwen-VL-Chat", model_type='text-generation', api_token=API_TOKEN, max_new_tokens=150).bind(stop=['<|im_end|>'])
|
| 69 |
|
| 70 |
|
|
@@ -82,6 +90,10 @@ async def conversation(data : ConversationPost):
|
|
| 82 |
async def conversation2(data : ConversationPost):
|
| 83 |
return {"output":llm2.invoke({"question":data.question})}
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
@app.post("/inference")
|
| 87 |
async def inference(data : InferencePost):
|
|
@@ -89,6 +101,8 @@ async def inference(data : InferencePost):
|
|
| 89 |
out = llm2.invoke(data.question)
|
| 90 |
elif data.with_template == 'qwen':
|
| 91 |
out = llm.invoke(data.question)
|
|
|
|
|
|
|
| 92 |
else:
|
| 93 |
out = llama.invoke(data.question)
|
| 94 |
|
|
|
|
| 61 |
do_sample=False,
|
| 62 |
)
|
| 63 |
|
| 64 |
+
qwen2 = HuggingFaceEndpoint(
|
| 65 |
+
repo_id="Qwen/Qwen2-1.5B-Instruct",
|
| 66 |
+
task="text-generation",
|
| 67 |
+
max_new_tokens=150,
|
| 68 |
+
do_sample=False,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
llm = prompt_qwen | qwen
|
| 72 |
|
| 73 |
llm2 = prompt_llama | llama
|
| 74 |
|
| 75 |
+
llm3 = prompt_qwen | qwen2
|
| 76 |
# llm = prompt | CustomLLM(repo_id="Qwen/Qwen-VL-Chat", model_type='text-generation', api_token=API_TOKEN, max_new_tokens=150).bind(stop=['<|im_end|>'])
|
| 77 |
|
| 78 |
|
|
|
|
| 90 |
async def conversation2(data : ConversationPost):
|
| 91 |
return {"output":llm2.invoke({"question":data.question})}
|
| 92 |
|
| 93 |
+
@app.post("/conversation3")
|
| 94 |
+
async def conversation3(data : ConversationPost):
|
| 95 |
+
return {"output":llm3.invoke({"question":data.question})}
|
| 96 |
+
|
| 97 |
|
| 98 |
@app.post("/inference")
|
| 99 |
async def inference(data : InferencePost):
|
|
|
|
| 101 |
out = llm2.invoke(data.question)
|
| 102 |
elif data.with_template == 'qwen':
|
| 103 |
out = llm.invoke(data.question)
|
| 104 |
+
elif data.with_template == 'qwen2':
|
| 105 |
+
out = llm3.invoke(data.question)
|
| 106 |
else:
|
| 107 |
out = llama.invoke(data.question)
|
| 108 |
|