|
|
from flask import Flask, request, jsonify |
|
|
import spaces |
|
|
import os |
|
|
import json |
|
|
from huggingface_hub import InferenceClient, login |
|
|
|
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
|
|
|
api_key = os.getenv("ai") |
|
|
client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct",token=api_key) |
|
|
|
|
|
@app.route('/chat', methods=['POST']) |
|
|
@spaces.GPU() |
|
|
def chat_completion(): |
|
|
data = request.json |
|
|
user_input = data.get('user_input', []) |
|
|
max_tokens = data.get('max_tokens', 2048) |
|
|
temperature = data.get('temperature', 0.7) |
|
|
top_p = data.get('top_p', 0.95) |
|
|
|
|
|
print(f"Received user_input: {user_input}") |
|
|
print(f"max_tokens: {max_tokens}, temperature: {temperature}, top_p: {top_p}") |
|
|
|
|
|
try: |
|
|
response = "" |
|
|
for message in client.chat_completion( |
|
|
user_input, |
|
|
max_tokens=max_tokens, |
|
|
stream=True, |
|
|
temperature=temperature, |
|
|
top_p=top_p, |
|
|
): |
|
|
token = message.choices[0].delta.get("content", "") |
|
|
response += token |
|
|
|
|
|
return jsonify({"status": "success", "output": response}) |
|
|
except Exception as e: |
|
|
return jsonify({"status": "error", "message": str(e)}) |
|
|
|
|
|
|
|
|
def main(): |
|
|
app.run(host='0.0.0.0', port=7050) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |