File size: 1,086 Bytes
584c69a 2690e9b 584c69a c334c50 1d0d462 c334c50 584c69a 687369c 2690e9b 657820d 7e3d6c8 b0b3de5 ea25e1f 7e3d6c8 584c69a 5180d45 584c69a 576a8c8 419a2ec 584c69a 68bb8db c334c50 8eeaa1e bc8f26b c334c50 47c3e83 584c69a 419a2ec 584c69a c334c50 584c69a c334c50 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | from flask import Flask, request, jsonify
from huggingface_hub import login
import spaces
import transformers
import torch
import os
# Initialize Flask app
app = Flask(__name__)
api_key = os.getenv("ai")
model_id = "UCODE/agent-llama"
pipeline = transformers.pipeline(
"text-generation",
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
device="cuda",
token=api_key
)
@app.route('/chat', methods=['POST'])
@spaces.GPU(enable_queue=True)
def chat_completion():
data = request.json
try:
print(data.get('messages', []))
outputs = pipeline(
text_inputs=data.get('messages', []),
max_new_tokens=data.get('max_tokens', 2048),
temperature=data.get('temperature', 0.7),
top_p=data.get('top_p', 0.95)
)
return jsonify({"status": "success", "output": outputs[0]["generated_text"][-1]})
except Exception as e:
return jsonify({"status": "error", "message": str(e)})
def main():
app.run(host='0.0.0.0', port=7051)
if __name__ == "__main__":
main()
|