Kelvin000010191's picture
Update app.py
9144104 verified
Raw
History Blame Contribute Delete
2.32 kB
from flask import Flask, request, jsonify
from flask_cors import CORS
import os
import requests
app = Flask(__name__)
CORS(app) # Keeps your Acode terminal connected securely!
# Pulls your hidden WRITE token from Space secrets
HF_TOKEN = os.getenv("HF_TOKEN")
MODEL_REPO = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit"
API_URL = f"https://api-inference.huggingface.co/models/{MODEL_REPO}"
@app.route("/", methods=["GET"])
def home():
return "Krypton-1 Core Unsloth Node Active."
@app.route("/api/chat", methods=["POST"])
def chat():
try:
data = request.json
user_prompt = data.get("prompt", "")
headers = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
# Structure the payload exactly how the serverless API wants it
payload = {
"inputs": f"<s>[INST] {user_prompt} [/INST]",
"parameters": {
"max_new_tokens": 250,
"temperature": 0.7,
"return_full_text": False
}
}
response = requests.post(API_URL, headers=headers, json=payload)
res_data = response.json()
# Check if the API tells us the model is still loading weights on their servers
if isinstance(res_data, dict) and "error" in res_data:
err_msg = res_data.get("error", "")
if "loading" in err_msg.lower():
est_time = round(res_data.get('estimated_time', 20))
return jsonify({"reply": f"SYS_BOOT: Krypton-1 is loading on Hugging Face clusters. Activation wait time: {est_time}s. Try hitting RUN again in a moment!"})
return jsonify({"reply": f"SYS_ALERT: API Error. Details: {err_msg}"})
# Extract the text cleanly
if isinstance(res_data, list) and len(res_data) > 0:
reply = res_data[0].get("generated_text", "Data packet empty.")
return jsonify({"reply": reply})
else:
return jsonify({"reply": f"SYS_ALERT: Unexpected API data layout format. Received: {str(res_data)}"})
except Exception as e:
return jsonify({"reply": f"SYS_ERR: Proxy pipeline broken. {str(e)}"})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)