debug
Browse files
data/models/llama3-1-8b.py
CHANGED
|
@@ -24,18 +24,14 @@ pipeline = transformers.pipeline(
|
|
| 24 |
@spaces.GPU(enable_queue=True)
|
| 25 |
def chat_completion():
|
| 26 |
data = request.json
|
| 27 |
-
|
| 28 |
-
user_input = data.get('messages', [])
|
| 29 |
-
max_tokens = data.get('max_tokens', 2048)
|
| 30 |
-
temperature = data.get('temperature', 0.7)
|
| 31 |
-
top_p = data.get('top_p', 0.95)
|
| 32 |
|
| 33 |
try:
|
|
|
|
| 34 |
outputs = pipeline(
|
| 35 |
-
user_input,
|
| 36 |
-
max_new_tokens=max_tokens,
|
| 37 |
-
temperature=temperature,
|
| 38 |
-
top_p=top_p
|
| 39 |
)
|
| 40 |
return jsonify({"status": "success", "output": outputs[0]["generated_text"][-1]})
|
| 41 |
except Exception as e:
|
|
|
|
| 24 |
@spaces.GPU(enable_queue=True)
|
| 25 |
def chat_completion():
|
| 26 |
data = request.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
try:
|
| 29 |
+
print(user_input)
|
| 30 |
outputs = pipeline(
|
| 31 |
+
user_input=data.get('messages', []),
|
| 32 |
+
max_new_tokens=data.get('max_tokens', 2048),
|
| 33 |
+
temperature=data.get('temperature', 0.7),
|
| 34 |
+
top_p=data.get('top_p', 0.95)
|
| 35 |
)
|
| 36 |
return jsonify({"status": "success", "output": outputs[0]["generated_text"][-1]})
|
| 37 |
except Exception as e:
|