from flask import Flask, jsonify, request
from flask_cors import CORS
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import os

app = Flask(__name__)
CORS(app)

# --- 🧠 MODEL SETUP (DeepSeek Coder) ---
# Hum "TheBloke" ka GGUF version use kar rahe hain jo CPU par chalta hai
REPO_ID = "TheBloke/DeepSeek-Coder-6.7B-Instruct-GGUF"
FILENAME = "deepseek-coder-6.7b-instruct.Q4_K_M.gguf"

print("📥 Downloading AI Model... (Pehli baar 5 min lagega)")

try:
    # Model ko Hugging Face se download karke cache mein rakho
    model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
    
    # Model Load karo (Context Window 2048 rakha hai coding ke liye)
    # n_threads=2 isliye kyunki Free tier mein 2 CPU milte hain
    llm = Llama(model_path=model_path, n_ctx=2048, n_threads=2)
    
    print("✅ DeepSeek Coder Loaded Successfully!")
except Exception as e:
    print(f"❌ Error Loading Model: {e}")
    llm = None

# --- CHAT LOGIC ---
@app.route('/')
def home():
    return "🤖 DEEPSEEK CODER (16GB RAM) IS LIVE ON HUGGING FACE!"

@app.route('/chat', methods=['POST'])
def chat():
    if not llm:
        return jsonify({"error": "Model abhi load ho raha hai, 1 min ruk kar try karo."})

    data = request.json
    user_msg = data.get('message', '')

    # Prompt format for DeepSeek Instruct
    # Is format se AI ko pata chalta hai ki ye Instruction hai
    prompt = f"""### Instruction:
You are an expert programmer. Write code or answer the following question:
{user_msg}

### Response:"""

    try:
        # AI Soch raha hai...
        output = llm(
            prompt, 
            max_tokens=512,  # Kitna lamba code likhe
            temperature=0.2, # 0.2 matlab accurate code (Creative nahi)
            stop=["### Instruction:"] # Yahan ruk jana
        )
        
        reply = output['choices'][0]['text']
        
        return jsonify({"reply": reply})

    except Exception as e:
        return jsonify({"error": str(e)})

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)