Spaces:

ghosthets
/

indexdio

Build error

App Files Files Community

indexdio / app.py

ghosthets

Update app.py

1856082 verified 20 days ago

raw

history blame contribute delete

2.64 kB

	import flask
	from flask import request, jsonify
	from llama_cpp import Llama
	import os
	from huggingface_hub import hf_hub_download

	# Initialize Flask
	app = flask.Flask(__name__)

	# GGUF Model Configuration
	REPO_ID = "dexcommunity/indexQ4"
	GGUF_FILENAME = "indexq4.gguf"

	print(f"🔄 Downloading GGUF model from {REPO_ID}...")

	# Download GGUF file from HuggingFace Hub
	try:
	model_path = hf_hub_download(
	repo_id=REPO_ID,
	filename=GGUF_FILENAME,
	repo_type="model"
	)
	print(f"✅ Model downloaded to: {model_path}")
	except Exception as e:
	print(f"❌ Download failed: {e}")
	print("💡 Make sure your GGUF file is uploaded to HuggingFace!")
	raise

	print(f"🔄 Loading GGUF model with llama.cpp...")

	# Load GGUF model with llama-cpp-python
	llm = Llama(
	model_path=model_path,
	n_ctx=2048, # Context window
	n_threads=4, # CPU threads (HF Free gives 2-4 cores)
	n_batch=512, # Batch size for processing
	verbose=False,
	n_gpu_layers=0 # CPU only (HF Free doesn't have GPU)
	)

	print(f"✅ GGUF Model loaded successfully!")
	print(f"📊 Model: {GGUF_FILENAME}")
	print(f"🔧 Context: 2048 tokens, Threads: 4")

	@app.route('/chat', methods=['POST'])
	def chat():
	try:
	data = request.get_json()
	msg = data.get("message", "")

	if not msg:
	return jsonify({"error": "No message sent"}), 400

	# Gemma 2B chat template
	prompt = f"""<start_of_turn>user
	{msg}<end_of_turn>
	<start_of_turn>model
	"""

	# Generate response with GGUF model
	response = llm(
	prompt,
	max_tokens=256, # Max response length
	temperature=0.7,
	top_p=0.9,
	top_k=40,
	repeat_penalty=1.1,
	stop=["<end_of_turn>", "<start_of_turn>"], # Stop sequences
	echo=False # Don't include prompt in output
	)

	# Extract generated text
	reply = response['choices'][0]['text'].strip()

	return jsonify({
	"reply": reply,
	"tokens_used": response['usage']['completion_tokens']
	})

	except Exception as e:
	import traceback
	error_details = traceback.format_exc()
	print(f"❌ Error: {error_details}")
	return jsonify({"error": str(e)}), 500

	@app.route('/health', methods=['GET'])
	def health():
	"""Health check endpoint"""
	return jsonify({
	"status": "healthy",
	"model": GGUF_FILENAME,
	"backend": "llama.cpp (GGUF)",
	"device": "CPU"
	})

	if __name__ == "__main__":
	app.run(host='0.0.0.0', port=7860, debug=False, threaded=True)