|
|
from flask import Flask, request, jsonify |
|
|
from huggingface_hub import login |
|
|
import spaces |
|
|
import transformers |
|
|
import torch |
|
|
import os |
|
|
|
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
api_key = os.getenv("ai") |
|
|
|
|
|
model_id = "UCODE/agent-llama" |
|
|
|
|
|
pipeline = transformers.pipeline( |
|
|
"text-generation", |
|
|
model=model_id, |
|
|
model_kwargs={"torch_dtype": torch.bfloat16}, |
|
|
device="cuda", |
|
|
token=api_key |
|
|
) |
|
|
|
|
|
@app.route('/chat', methods=['POST']) |
|
|
@spaces.GPU(enable_queue=True) |
|
|
def chat_completion(): |
|
|
data = request.json |
|
|
|
|
|
try: |
|
|
print(data.get('messages', [])) |
|
|
outputs = pipeline( |
|
|
text_inputs=data.get('messages', []), |
|
|
max_new_tokens=data.get('max_tokens', 2048), |
|
|
temperature=data.get('temperature', 0.7), |
|
|
top_p=data.get('top_p', 0.95) |
|
|
) |
|
|
return jsonify({"status": "success", "output": outputs[0]["generated_text"][-1]}) |
|
|
except Exception as e: |
|
|
return jsonify({"status": "error", "message": str(e)}) |
|
|
|
|
|
|
|
|
def main(): |
|
|
app.run(host='0.0.0.0', port=7051) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|