| import os
|
| import threading
|
| import time
|
| from flask import Flask, request, jsonify
|
| from flask_cors import CORS
|
| from llama_cpp import Llama
|
| from huggingface_hub import hf_hub_download
|
|
|
| app = Flask(__name__)
|
| CORS(app)
|
|
|
|
|
|
|
| MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
|
| MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"
|
|
|
| model = None
|
| loading_error = None
|
|
|
| def load_model():
|
| global model, loading_error
|
| try:
|
|
|
| token = os.environ.get("HF_TOKEN")
|
|
|
| print(f"π₯ Downloading fallback model from: {MODEL_REPO}/{MODEL_FILE}...")
|
|
|
|
|
| model_path = hf_hub_download(
|
| repo_id=MODEL_REPO,
|
| filename=MODEL_FILE,
|
| token=token
|
| )
|
|
|
| print(f"β
Model downloaded to: {model_path}")
|
| print("ποΈ Initializing model engine (llama-cpp)...")
|
|
|
|
|
| model = Llama(
|
| model_path=model_path,
|
| n_ctx=2048,
|
| n_threads=2,
|
| n_batch=128,
|
| verbose=False
|
| )
|
| print("β
Fallback model loaded successfully!")
|
| except Exception as e:
|
| loading_error = str(e)
|
| print(f"β Error loading fallback model: {e}")
|
|
|
|
|
| threading.Thread(target=load_model, daemon=True).start()
|
|
|
| @app.route('/', methods=['GET'])
|
| def index():
|
| """Root endpoint with API documentation"""
|
| html = """
|
| <!DOCTYPE html>
|
| <html>
|
| <head>
|
| <title>OpenGSSTEC AI API - Fallback Mode</title>
|
| <style>
|
| body { font-family: Arial, sans-serif; max-width: 800px; margin: 50px auto; padding: 20px; }
|
| h1 { color: #333; }
|
| .endpoint { background: #f5f5f5; padding: 15px; margin: 10px 0; border-radius: 5px; }
|
| code { background: #e0e0e0; padding: 2px 6px; border-radius: 3px; }
|
| .status { padding: 10px; border-radius: 5px; margin: 20px 0; }
|
| .online { background: #d4edda; color: #155724; }
|
| .loading { background: #fff3cd; color: #856404; }
|
| .warning { background: #f8d7da; color: #721c24; }
|
| </style>
|
| </head>
|
| <body>
|
| <h1>π€ OpenGSSTEC AI API - Fallback Mode</h1>
|
| <div class="status warning">
|
| β οΈ Running in fallback mode with Phi-3-mini due to 14B model instability
|
| </div>
|
| <div class="status """ + ("online" if model else "loading") + """">
|
| Status: """ + ("β
Online and Ready" if model else "β³ Loading Model...") + """
|
| </div>
|
|
|
| <h2>Available Endpoints</h2>
|
|
|
| <div class="endpoint">
|
| <h3>GET /health</h3>
|
| <p>Check API health and model status</p>
|
| </div>
|
|
|
| <div class="endpoint">
|
| <h3>POST /chat</h3>
|
| <p>Send chat messages to the AI model</p>
|
| </div>
|
|
|
| <h2>Model Information</h2>
|
| <ul>
|
| <li><strong>Model:</strong> Phi-3-mini-4k-instruct (Q4)</li>
|
| <li><strong>Repository:</strong> """ + MODEL_REPO + """</li>
|
| <li><strong>Parameters:</strong> 3.8B</li>
|
| <li><strong>Context Length:</strong> 2048 tokens</li>
|
| <li><strong>Status:</strong> Fallback mode - stable and reliable</li>
|
| </ul>
|
| </body>
|
| </html>
|
| """
|
| return html
|
|
|
| @app.route('/health', methods=['GET'])
|
| def health():
|
| return jsonify({
|
| "status": "online" if model else "loading",
|
| "repo": MODEL_REPO,
|
| "file": MODEL_FILE,
|
| "mode": "fallback",
|
| "error": loading_error
|
| })
|
|
|
| @app.route('/chat', methods=['POST'])
|
| def chat():
|
| if not model:
|
| return jsonify({"error": "Model still loading"}), 503
|
|
|
| try:
|
| data = request.json
|
| messages = data.get('messages', [])
|
|
|
|
|
| prompt = ""
|
| for msg in messages:
|
| role = msg.get('role', 'user')
|
| content = msg.get('content', '')
|
| if role == 'system':
|
| prompt += f"System: {content}\n"
|
| elif role == 'user':
|
| prompt += f"User: {content}\n"
|
| elif role == 'assistant':
|
| prompt += f"Assistant: {content}\n"
|
| prompt += "Assistant: "
|
|
|
|
|
| if len(prompt) > 1500:
|
| prompt = prompt[-1500:]
|
|
|
| output = model(
|
| prompt,
|
| max_tokens=300,
|
| temperature=0.7,
|
| top_p=0.9,
|
| stop=["User:", "System:"],
|
| echo=False
|
| )
|
|
|
| response_text = output["choices"][0]["text"].strip()
|
|
|
| return jsonify({
|
| "choices": [{
|
| "message": {"role": "assistant", "content": response_text},
|
| "finish_reason": "stop"
|
| }]
|
| })
|
| except Exception as e:
|
| return jsonify({"error": str(e)}), 500
|
|
|
| if __name__ == '__main__':
|
| port = int(os.environ.get('PORT', 7860))
|
| app.run(host='0.0.0.0', port=port) |