agent / data /models /llama3-1-8b.py
abenkbp's picture
debug
b0b3de5
from flask import Flask, request, jsonify
from huggingface_hub import login
import spaces
import transformers
import torch
import os
# Initialize Flask app
app = Flask(__name__)
api_key = os.getenv("ai")
model_id = "UCODE/agent-llama"
pipeline = transformers.pipeline(
"text-generation",
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
device="cuda",
token=api_key
)
@app.route('/chat', methods=['POST'])
@spaces.GPU(enable_queue=True)
def chat_completion():
data = request.json
try:
print(data.get('messages', []))
outputs = pipeline(
text_inputs=data.get('messages', []),
max_new_tokens=data.get('max_tokens', 2048),
temperature=data.get('temperature', 0.7),
top_p=data.get('top_p', 0.95)
)
return jsonify({"status": "success", "output": outputs[0]["generated_text"][-1]})
except Exception as e:
return jsonify({"status": "error", "message": str(e)})
def main():
app.run(host='0.0.0.0', port=7051)
if __name__ == "__main__":
main()