| import gradio as gr |
| from huggingface_hub import hf_hub_download |
| from llama_cpp import Llama |
| import time |
| import os |
|
|
| |
| MODEL_NAME = "DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf" |
| MODEL_PATH = MODEL_NAME |
|
|
| |
| llm = None |
|
|
| def load_model(): |
| """Lazy-load the model only when needed.""" |
| global llm |
| if llm is None: |
| print(f"⏳ Loading model {MODEL_NAME}... This may take 1-2 minutes on first run.") |
| start_time = time.time() |
| |
| |
| llm = Llama( |
| model_path=MODEL_PATH, |
| n_ctx=2048, |
| n_threads=2, |
| n_gpu_layers=0, |
| verbose=False |
| ) |
| |
| load_time = time.time() - start_time |
| print(f"✅ Model loaded in {load_time:.1f} seconds. Ready for inference.") |
| return llm |
|
|
| def generate_code(prompt, max_tokens=256, temperature=0.7): |
| """Main generation function.""" |
| try: |
| model = load_model() |
| |
| |
| formatted_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n" |
| |
| |
| output = model( |
| formatted_prompt, |
| max_tokens=max_tokens, |
| temperature=temperature, |
| top_p=0.95, |
| echo=False, |
| stop=["###", "\n\n\n"] |
| ) |
| |
| return output['choices'][0]['text'].strip() |
| |
| except Exception as e: |
| return f"❌ Error: {str(e)}" |
|
|
| |
| demo = gr.Interface( |
| fn=generate_code, |
| inputs=[ |
| gr.Textbox( |
| label="Code Prompt", |
| placeholder="Write a Python function to reverse a string...", |
| lines=4 |
| ), |
| gr.Slider( |
| minimum=32, |
| maximum=512, |
| value=256, |
| step=32, |
| label="Max Tokens" |
| ), |
| gr.Slider( |
| minimum=0.1, |
| maximum=1.0, |
| value=0.7, |
| step=0.1, |
| label="Temperature" |
| ) |
| ], |
| outputs=gr.Code( |
| label="Generated Code", |
| language="python" |
| ), |
| title="💻 DeepSeek Coder V2 Lite (16B) - o87Dev", |
| description="**CPU Deployment** - Largest viable model on Hugging Face Spaces free tier. ⚠️ **First request loads model (~1-2 min)**", |
| examples=[ |
| ["Write a Python function to check if a number is prime"], |
| ["Create a React component for a login form"], |
| ["Explain binary search algorithm in Python"] |
| ] |
| ) |
|
|
| |
| if __name__ == "__main__": |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| share=False |
| ) |
|
|