debug
Browse files
data/models/llama3-1-70b.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
from flask import Flask, request, jsonify
|
| 2 |
from huggingface_hub import login
|
| 3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 4 |
-
from threading import Thread
|
| 5 |
import spaces
|
| 6 |
import torch
|
| 7 |
import os
|
|
@@ -35,8 +34,6 @@ def chat_completion():
|
|
| 35 |
try:
|
| 36 |
input_ids = tokenizer.apply_chat_template(user_input, tokenize=False, add_generation_prompt=True)
|
| 37 |
inputs = tokenizer(input_ids, return_tensors="pt").to(0)
|
| 38 |
-
|
| 39 |
-
streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
| 40 |
|
| 41 |
generate_kwargs = dict(
|
| 42 |
inputs,
|
|
|
|
| 1 |
from flask import Flask, request, jsonify
|
| 2 |
from huggingface_hub import login
|
| 3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
| 4 |
import spaces
|
| 5 |
import torch
|
| 6 |
import os
|
|
|
|
| 34 |
try:
|
| 35 |
input_ids = tokenizer.apply_chat_template(user_input, tokenize=False, add_generation_prompt=True)
|
| 36 |
inputs = tokenizer(input_ids, return_tensors="pt").to(0)
|
|
|
|
|
|
|
| 37 |
|
| 38 |
generate_kwargs = dict(
|
| 39 |
inputs,
|