Q3 / app.py
Gaston895's picture
Upload app.py
1cd3db1 verified
import os
import threading
import time
from flask import Flask, request, jsonify
from flask_cors import CORS
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
app = Flask(__name__)
CORS(app)
# Fallback to a smaller, more stable model if the 14B model fails
# This is a proven stable configuration
MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"
model = None
loading_error = None
def load_model():
global model, loading_error
try:
# Use HF_TOKEN if your space is private
token = os.environ.get("HF_TOKEN")
print(f"πŸ“₯ Downloading fallback model from: {MODEL_REPO}/{MODEL_FILE}...")
# Download from HuggingFace model repository
model_path = hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILE,
token=token
)
print(f"βœ… Model downloaded to: {model_path}")
print("πŸ—οΈ Initializing model engine (llama-cpp)...")
# Use very conservative settings for maximum stability
model = Llama(
model_path=model_path,
n_ctx=2048, # Phi-3 mini works well with 2K context
n_threads=2,
n_batch=128,
verbose=False
)
print("βœ… Fallback model loaded successfully!")
except Exception as e:
loading_error = str(e)
print(f"❌ Error loading fallback model: {e}")
# Start loading in background
threading.Thread(target=load_model, daemon=True).start()
@app.route('/', methods=['GET'])
def index():
"""Root endpoint with API documentation"""
html = """
<!DOCTYPE html>
<html>
<head>
<title>OpenGSSTEC AI API - Fallback Mode</title>
<style>
body { font-family: Arial, sans-serif; max-width: 800px; margin: 50px auto; padding: 20px; }
h1 { color: #333; }
.endpoint { background: #f5f5f5; padding: 15px; margin: 10px 0; border-radius: 5px; }
code { background: #e0e0e0; padding: 2px 6px; border-radius: 3px; }
.status { padding: 10px; border-radius: 5px; margin: 20px 0; }
.online { background: #d4edda; color: #155724; }
.loading { background: #fff3cd; color: #856404; }
.warning { background: #f8d7da; color: #721c24; }
</style>
</head>
<body>
<h1>πŸ€– OpenGSSTEC AI API - Fallback Mode</h1>
<div class="status warning">
⚠️ Running in fallback mode with Phi-3-mini due to 14B model instability
</div>
<div class="status """ + ("online" if model else "loading") + """">
Status: """ + ("βœ… Online and Ready" if model else "⏳ Loading Model...") + """
</div>
<h2>Available Endpoints</h2>
<div class="endpoint">
<h3>GET /health</h3>
<p>Check API health and model status</p>
</div>
<div class="endpoint">
<h3>POST /chat</h3>
<p>Send chat messages to the AI model</p>
</div>
<h2>Model Information</h2>
<ul>
<li><strong>Model:</strong> Phi-3-mini-4k-instruct (Q4)</li>
<li><strong>Repository:</strong> """ + MODEL_REPO + """</li>
<li><strong>Parameters:</strong> 3.8B</li>
<li><strong>Context Length:</strong> 2048 tokens</li>
<li><strong>Status:</strong> Fallback mode - stable and reliable</li>
</ul>
</body>
</html>
"""
return html
@app.route('/health', methods=['GET'])
def health():
return jsonify({
"status": "online" if model else "loading",
"repo": MODEL_REPO,
"file": MODEL_FILE,
"mode": "fallback",
"error": loading_error
})
@app.route('/chat', methods=['POST'])
def chat():
if not model:
return jsonify({"error": "Model still loading"}), 503
try:
data = request.json
messages = data.get('messages', [])
# Simple prompt formatting for Phi-3
prompt = ""
for msg in messages:
role = msg.get('role', 'user')
content = msg.get('content', '')
if role == 'system':
prompt += f"System: {content}\n"
elif role == 'user':
prompt += f"User: {content}\n"
elif role == 'assistant':
prompt += f"Assistant: {content}\n"
prompt += "Assistant: "
# Keep prompt reasonable
if len(prompt) > 1500:
prompt = prompt[-1500:]
output = model(
prompt,
max_tokens=300,
temperature=0.7,
top_p=0.9,
stop=["User:", "System:"],
echo=False
)
response_text = output["choices"][0]["text"].strip()
return jsonify({
"choices": [{
"message": {"role": "assistant", "content": response_text},
"finish_reason": "stop"
}]
})
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == '__main__':
port = int(os.environ.get('PORT', 7860))
app.run(host='0.0.0.0', port=port)