Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,15 +14,22 @@ async def root():
|
|
| 14 |
@app.post("/hello/")
|
| 15 |
def say_hello(msg: Annotated[str, Form()]):
|
| 16 |
print("model")
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
)
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
input_ids = tokenizer(msg, return_tensors="pt").to("cpu")
|
| 25 |
print("output")
|
| 26 |
-
|
|
|
|
|
|
|
| 27 |
print("complete")
|
| 28 |
return {"message": tokenizer.decode(outputs[0])}
|
|
|
|
| 14 |
@app.post("/hello/")
|
| 15 |
def say_hello(msg: Annotated[str, Form()]):
|
| 16 |
print("model")
|
| 17 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 18 |
+
checkpoint = "HuggingFaceTB/SmolLM-1.7B-Instruct"
|
| 19 |
+
|
| 20 |
+
device = "cpu" # for GPU usage or "cpu" for CPU usage
|
| 21 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
| 22 |
+
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
|
| 23 |
+
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
|
| 24 |
+
|
| 25 |
+
messages = [{"role": "user", "content": "things about elasticsearch"}]
|
| 26 |
+
input_text=tokenizer.apply_chat_template(messages, tokenize=False)
|
| 27 |
+
print(input_text)
|
| 28 |
+
|
| 29 |
input_ids = tokenizer(msg, return_tensors="pt").to("cpu")
|
| 30 |
print("output")
|
| 31 |
+
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
| 32 |
+
outputs = model.generate(inputs, max_new_tokens=256, temperature=0.6, top_p=0.92, do_sample=True)
|
| 33 |
+
|
| 34 |
print("complete")
|
| 35 |
return {"message": tokenizer.decode(outputs[0])}
|