from flask import Flask, request, jsonify import spaces import os import json from huggingface_hub import InferenceClient, login # Initialize Flask app app = Flask(__name__) # Load the API key and initialize the InferenceClient api_key = os.getenv("ai") client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct",token=api_key) @app.route('/chat', methods=['POST']) @spaces.GPU() def chat_completion(): data = request.json user_input = data.get('user_input', []) max_tokens = data.get('max_tokens', 2048) temperature = data.get('temperature', 0.7) top_p = data.get('top_p', 0.95) print(f"Received user_input: {user_input}") print(f"max_tokens: {max_tokens}, temperature: {temperature}, top_p: {top_p}") try: response = "" for message in client.chat_completion( user_input, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.get("content", "") response += token return jsonify({"status": "success", "output": response}) except Exception as e: return jsonify({"status": "error", "message": str(e)}) def main(): app.run(host='0.0.0.0', port=7050) if __name__ == "__main__": main()