from flask import Flask, jsonify, request from flask_cors import CORS from huggingface_hub import hf_hub_download from llama_cpp import Llama import os app = Flask(__name__) CORS(app) # --- 🧠 MODEL SETUP (DeepSeek Coder) --- # Hum "TheBloke" ka GGUF version use kar rahe hain jo CPU par chalta hai REPO_ID = "TheBloke/DeepSeek-Coder-6.7B-Instruct-GGUF" FILENAME = "deepseek-coder-6.7b-instruct.Q4_K_M.gguf" print("📥 Downloading AI Model... (Pehli baar 5 min lagega)") try: # Model ko Hugging Face se download karke cache mein rakho model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME) # Model Load karo (Context Window 2048 rakha hai coding ke liye) # n_threads=2 isliye kyunki Free tier mein 2 CPU milte hain llm = Llama(model_path=model_path, n_ctx=2048, n_threads=2) print("✅ DeepSeek Coder Loaded Successfully!") except Exception as e: print(f"❌ Error Loading Model: {e}") llm = None # --- CHAT LOGIC --- @app.route('/') def home(): return "🤖 DEEPSEEK CODER (16GB RAM) IS LIVE ON HUGGING FACE!" @app.route('/chat', methods=['POST']) def chat(): if not llm: return jsonify({"error": "Model abhi load ho raha hai, 1 min ruk kar try karo."}) data = request.json user_msg = data.get('message', '') # Prompt format for DeepSeek Instruct # Is format se AI ko pata chalta hai ki ye Instruction hai prompt = f"""### Instruction: You are an expert programmer. Write code or answer the following question: {user_msg} ### Response:""" try: # AI Soch raha hai... output = llm( prompt, max_tokens=512, # Kitna lamba code likhe temperature=0.2, # 0.2 matlab accurate code (Creative nahi) stop=["### Instruction:"] # Yahan ruk jana ) reply = output['choices'][0]['text'] return jsonify({"reply": reply}) except Exception as e: return jsonify({"error": str(e)}) if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)