import gradio as gr from huggingface_hub import hf_hub_download from llama_cpp import Llama import time import os # Configuration MODEL_NAME = "DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf" MODEL_PATH = MODEL_NAME # Since we placed it in the same directory # Initialize model (will be loaded on first use) llm = None def load_model(): """Lazy-load the model only when needed.""" global llm if llm is None: print(f"⏳ Loading model {MODEL_NAME}... This may take 1-2 minutes on first run.") start_time = time.time() # CPU-optimized settings for free tier llm = Llama( model_path=MODEL_PATH, n_ctx=2048, # Context size (smaller = less memory) n_threads=2, # Use 2 CPU threads n_gpu_layers=0, # CPU only on free tier verbose=False ) load_time = time.time() - start_time print(f"✅ Model loaded in {load_time:.1f} seconds. Ready for inference.") return llm def generate_code(prompt, max_tokens=256, temperature=0.7): """Main generation function.""" try: model = load_model() # Format prompt for DeepSeek-Coder Instruct models formatted_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n" # Generate output = model( formatted_prompt, max_tokens=max_tokens, temperature=temperature, top_p=0.95, echo=False, stop=["###", "\n\n\n"] ) return output['choices'][0]['text'].strip() except Exception as e: return f"❌ Error: {str(e)}" # Create Gradio interface demo = gr.Interface( fn=generate_code, inputs=[ gr.Textbox( label="Code Prompt", placeholder="Write a Python function to reverse a string...", lines=4 ), gr.Slider( minimum=32, maximum=512, value=256, step=32, label="Max Tokens" ), gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature" ) ], outputs=gr.Code( label="Generated Code", language="python" ), title="💻 DeepSeek Coder V2 Lite (16B) - o87Dev", description="**CPU Deployment** - Largest viable model on Hugging Face Spaces free tier. ⚠️ **First request loads model (~1-2 min)**", examples=[ ["Write a Python function to check if a number is prime"], ["Create a React component for a login form"], ["Explain binary search algorithm in Python"] ] ) # Launch with queue for better handling on free tier if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False # Set to True if you want a public link )