from flask import Flask, request, jsonify from flask_cors import CORS import os import requests app = Flask(__name__) CORS(app) # Keeps your Acode terminal connected securely! # Pulls your hidden WRITE token from Space secrets HF_TOKEN = os.getenv("HF_TOKEN") MODEL_REPO = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit" API_URL = f"https://api-inference.huggingface.co/models/{MODEL_REPO}" @app.route("/", methods=["GET"]) def home(): return "Krypton-1 Core Unsloth Node Active." @app.route("/api/chat", methods=["POST"]) def chat(): try: data = request.json user_prompt = data.get("prompt", "") headers = { "Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json" } # Structure the payload exactly how the serverless API wants it payload = { "inputs": f"[INST] {user_prompt} [/INST]", "parameters": { "max_new_tokens": 250, "temperature": 0.7, "return_full_text": False } } response = requests.post(API_URL, headers=headers, json=payload) res_data = response.json() # Check if the API tells us the model is still loading weights on their servers if isinstance(res_data, dict) and "error" in res_data: err_msg = res_data.get("error", "") if "loading" in err_msg.lower(): est_time = round(res_data.get('estimated_time', 20)) return jsonify({"reply": f"SYS_BOOT: Krypton-1 is loading on Hugging Face clusters. Activation wait time: {est_time}s. Try hitting RUN again in a moment!"}) return jsonify({"reply": f"SYS_ALERT: API Error. Details: {err_msg}"}) # Extract the text cleanly if isinstance(res_data, list) and len(res_data) > 0: reply = res_data[0].get("generated_text", "Data packet empty.") return jsonify({"reply": reply}) else: return jsonify({"reply": f"SYS_ALERT: Unexpected API data layout format. Received: {str(res_data)}"}) except Exception as e: return jsonify({"reply": f"SYS_ERR: Proxy pipeline broken. {str(e)}"}) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)