from flask import Flask, request, jsonify from huggingface_hub import login import spaces import transformers import torch import os # Initialize Flask app app = Flask(__name__) api_key = os.getenv("ai") model_id = "UCODE/agent-llama" pipeline = transformers.pipeline( "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device="cuda", token=api_key ) @app.route('/chat', methods=['POST']) @spaces.GPU(enable_queue=True) def chat_completion(): data = request.json try: print(data.get('messages', [])) outputs = pipeline( text_inputs=data.get('messages', []), max_new_tokens=data.get('max_tokens', 2048), temperature=data.get('temperature', 0.7), top_p=data.get('top_p', 0.95) ) return jsonify({"status": "success", "output": outputs[0]["generated_text"][-1]}) except Exception as e: return jsonify({"status": "error", "message": str(e)}) def main(): app.run(host='0.0.0.0', port=7051) if __name__ == "__main__": main()