| from flask import Flask, request, jsonify |
| import spaces |
| import os |
| import json |
| from huggingface_hub import InferenceClient, login |
|
|
| |
| app = Flask(__name__) |
|
|
| |
| api_key = os.getenv("UCODE_SECRET") |
| login(api_key) |
| client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct") |
|
|
| @app.route('/chat', methods=['POST']) |
| @spaces.GPU() |
| def chat_completion(): |
| data = request.json |
| user_input = data.get('user_input', '') |
| max_tokens = data.get('max_tokens', 512) |
| temperature = data.get('temperature', 0.7) |
| top_p = data.get('top_p', 0.95) |
|
|
| try: |
| response = "" |
| for message in client.chat_completion( |
| user_input, |
| max_tokens=max_tokens, |
| stream=True, |
| temperature=temperature, |
| top_p=top_p, |
| ): |
| token = message.choices[0].delta.get("content", "") |
| response += token |
|
|
| return jsonify({"status": "success", "output": response}) |
| except Exception as e: |
| return jsonify({"status": "error", "message": str(e)}) |
|
|
| def main(): |
| app.run(host='localhost', port=3000) |
|
|
| if __name__ == "__main__": |
| main() |