Spaces:

Gaston895
/

Q3

Sleeping

App Files Files Community

Q3 / app.py

Gaston895

Upload app.py

1cd3db1 verified about 2 months ago

raw

history blame contribute delete

5.46 kB

	import os
	import threading
	import time
	from flask import Flask, request, jsonify
	from flask_cors import CORS
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	app = Flask(__name__)
	CORS(app)

	# Fallback to a smaller, more stable model if the 14B model fails
	# This is a proven stable configuration
	MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
	MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"

	model = None
	loading_error = None

	def load_model():
	global model, loading_error
	try:
	# Use HF_TOKEN if your space is private
	token = os.environ.get("HF_TOKEN")

	print(f"📥 Downloading fallback model from: {MODEL_REPO}/{MODEL_FILE}...")

	# Download from HuggingFace model repository
	model_path = hf_hub_download(
	repo_id=MODEL_REPO,
	filename=MODEL_FILE,
	token=token
	)

	print(f"✅ Model downloaded to: {model_path}")
	print("🏗️ Initializing model engine (llama-cpp)...")

	# Use very conservative settings for maximum stability
	model = Llama(
	model_path=model_path,
	n_ctx=2048, # Phi-3 mini works well with 2K context
	n_threads=2,
	n_batch=128,
	verbose=False
	)
	print("✅ Fallback model loaded successfully!")
	except Exception as e:
	loading_error = str(e)
	print(f"❌ Error loading fallback model: {e}")

	# Start loading in background
	threading.Thread(target=load_model, daemon=True).start()

	@app.route('/', methods=['GET'])
	def index():
	"""Root endpoint with API documentation"""
	html = """
	<!DOCTYPE html>
	<html>
	<head>
	<title>OpenGSSTEC AI API - Fallback Mode</title>
	<style>
	body { font-family: Arial, sans-serif; max-width: 800px; margin: 50px auto; padding: 20px; }
	h1 { color: #333; }
	.endpoint { background: #f5f5f5; padding: 15px; margin: 10px 0; border-radius: 5px; }
	code { background: #e0e0e0; padding: 2px 6px; border-radius: 3px; }
	.status { padding: 10px; border-radius: 5px; margin: 20px 0; }
	.online { background: #d4edda; color: #155724; }
	.loading { background: #fff3cd; color: #856404; }
	.warning { background: #f8d7da; color: #721c24; }
	</style>
	</head>
	<body>
	<h1>🤖 OpenGSSTEC AI API - Fallback Mode</h1>
	<div class="status warning">
	⚠️ Running in fallback mode with Phi-3-mini due to 14B model instability
	</div>
	<div class="status """ + ("online" if model else "loading") + """">
	Status: """ + ("✅ Online and Ready" if model else "⏳ Loading Model...") + """
	</div>

	<h2>Available Endpoints</h2>

	<div class="endpoint">
	<h3>GET /health</h3>
	<p>Check API health and model status</p>
	</div>

	<div class="endpoint">
	<h3>POST /chat</h3>
	<p>Send chat messages to the AI model</p>
	</div>

	<h2>Model Information</h2>
	<ul>
	<li><strong>Model:</strong> Phi-3-mini-4k-instruct (Q4)</li>
	<li><strong>Repository:</strong> """ + MODEL_REPO + """</li>
	<li><strong>Parameters:</strong> 3.8B</li>
	<li><strong>Context Length:</strong> 2048 tokens</li>
	<li><strong>Status:</strong> Fallback mode - stable and reliable</li>
	</ul>
	</body>
	</html>
	"""
	return html

	@app.route('/health', methods=['GET'])
	def health():
	return jsonify({
	"status": "online" if model else "loading",
	"repo": MODEL_REPO,
	"file": MODEL_FILE,
	"mode": "fallback",
	"error": loading_error
	})

	@app.route('/chat', methods=['POST'])
	def chat():
	if not model:
	return jsonify({"error": "Model still loading"}), 503

	try:
	data = request.json
	messages = data.get('messages', [])

	# Simple prompt formatting for Phi-3
	prompt = ""
	for msg in messages:
	role = msg.get('role', 'user')
	content = msg.get('content', '')
	if role == 'system':
	prompt += f"System: {content}\n"
	elif role == 'user':
	prompt += f"User: {content}\n"
	elif role == 'assistant':
	prompt += f"Assistant: {content}\n"
	prompt += "Assistant: "

	# Keep prompt reasonable
	if len(prompt) > 1500:
	prompt = prompt[-1500:]

	output = model(
	prompt,
	max_tokens=300,
	temperature=0.7,
	top_p=0.9,
	stop=["User:", "System:"],
	echo=False
	)

	response_text = output["choices"][0]["text"].strip()

	return jsonify({
	"choices": [{
	"message": {"role": "assistant", "content": response_text},
	"finish_reason": "stop"
	}]
	})
	except Exception as e:
	return jsonify({"error": str(e)}), 500

	if __name__ == '__main__':
	port = int(os.environ.get('PORT', 7860))
	app.run(host='0.0.0.0', port=port)