import gradio as gr from huggingface_hub import hf_hub_download from llama_cpp import Llama import time import os # Configuration - Using a 6.7B model that fits free tier MODEL_REPO_ID = "TheBloke/DeepSeek-Coder-6.7B-Instruct-GGUF" MODEL_FILENAME = "deepseek-coder-6.7b-instruct.Q4_K_M.gguf" # Download model if not already present def get_model_path(): """Download model from Hugging Face Hub or use cached version.""" try: model_path = hf_hub_download( repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME, local_dir="./models", # Save to models folder local_dir_use_symlinks=False, # Avoid symlink issues resume_download=True # Resume if interrupted ) print(f"✅ Model downloaded to: {model_path}") return model_path except Exception as e: print(f"❌ Error downloading model: {e}") # Fallback to local path if already uploaded if os.path.exists(MODEL_FILENAME): return MODEL_FILENAME raise # Initialize model MODEL_PATH = get_model_path() llm = None def load_model(): """Lazy-load the model only when needed.""" global llm if llm is None: print(f"⏳ Loading model... This may take 1-2 minutes on first run.") start_time = time.time() # Optimized for free tier constraints llm = Llama( model_path=MODEL_PATH, n_ctx=2048, # Context size (smaller = less memory) n_threads=2, # Use 2 CPU threads (free tier has 2) n_gpu_layers=0, # CPU only on free tier verbose=True # Helpful for debugging ) load_time = time.time() - start_time print(f"✅ Model loaded in {load_time:.1f} seconds. Ready for inference.") return llm def generate_code(prompt, max_tokens=256, temperature=0.7): """Main generation function.""" try: model = load_model() # Format prompt for DeepSeek-Coder Instruct models formatted_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n" # Generate output = model( formatted_prompt, max_tokens=max_tokens, temperature=temperature, top_p=0.95, echo=False, stop=["###", "\n\n\n"] ) return output['choices'][0]['text'].strip() except Exception as e: return f"❌ Error: {str(e)}" # Create Gradio interface demo = gr.Interface( fn=generate_code, inputs=[ gr.Textbox( label="Code Prompt", placeholder="Write a Python function to reverse a string...", lines=4 ), gr.Slider( minimum=32, maximum=512, value=256, step=32, label="Max Tokens" ), gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature" ) ], outputs=gr.Code( label="Generated Code", language="python" ), title="💻 DeepSeek Coder 6.7B Instruct - o87Dev", description="**CPU Deployment** - Running on Hugging Face Spaces free tier. ⚠️ **First request loads model (~1-2 min)**", examples=[ ["Write a Python function to check if a number is prime"], ["Create a React component for a login form"], ["Explain binary search algorithm in Python"] ] ) # Launch with queue for better handling on free tier if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False )