| | import gradio as gr |
| | from huggingface_hub import hf_hub_download |
| | from llama_cpp import Llama |
| | import time |
| | import os |
| |
|
| | |
| | MODEL_NAME = "DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf" |
| | MODEL_PATH = MODEL_NAME |
| |
|
| | |
| | llm = None |
| |
|
| | def load_model(): |
| | """Lazy-load the model only when needed.""" |
| | global llm |
| | if llm is None: |
| | print(f"⏳ Loading model {MODEL_NAME}... This may take 1-2 minutes on first run.") |
| | start_time = time.time() |
| | |
| | |
| | llm = Llama( |
| | model_path=MODEL_PATH, |
| | n_ctx=2048, |
| | n_threads=2, |
| | n_gpu_layers=0, |
| | verbose=False |
| | ) |
| | |
| | load_time = time.time() - start_time |
| | print(f"✅ Model loaded in {load_time:.1f} seconds. Ready for inference.") |
| | return llm |
| |
|
| | def generate_code(prompt, max_tokens=256, temperature=0.7): |
| | """Main generation function.""" |
| | try: |
| | model = load_model() |
| | |
| | |
| | formatted_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n" |
| | |
| | |
| | output = model( |
| | formatted_prompt, |
| | max_tokens=max_tokens, |
| | temperature=temperature, |
| | top_p=0.95, |
| | echo=False, |
| | stop=["###", "\n\n\n"] |
| | ) |
| | |
| | return output['choices'][0]['text'].strip() |
| | |
| | except Exception as e: |
| | return f"❌ Error: {str(e)}" |
| |
|
| | |
| | demo = gr.Interface( |
| | fn=generate_code, |
| | inputs=[ |
| | gr.Textbox( |
| | label="Code Prompt", |
| | placeholder="Write a Python function to reverse a string...", |
| | lines=4 |
| | ), |
| | gr.Slider( |
| | minimum=32, |
| | maximum=512, |
| | value=256, |
| | step=32, |
| | label="Max Tokens" |
| | ), |
| | gr.Slider( |
| | minimum=0.1, |
| | maximum=1.0, |
| | value=0.7, |
| | step=0.1, |
| | label="Temperature" |
| | ) |
| | ], |
| | outputs=gr.Code( |
| | label="Generated Code", |
| | language="python" |
| | ), |
| | title="💻 DeepSeek Coder V2 Lite (16B) - o87Dev", |
| | description="**CPU Deployment** - Largest viable model on Hugging Face Spaces free tier. ⚠️ **First request loads model (~1-2 min)**", |
| | examples=[ |
| | ["Write a Python function to check if a number is prime"], |
| | ["Create a React component for a login form"], |
| | ["Explain binary search algorithm in Python"] |
| | ] |
| | ) |
| |
|
| | |
| | if __name__ == "__main__": |
| | demo.launch( |
| | server_name="0.0.0.0", |
| | server_port=7860, |
| | share=False |
| | ) |
| |
|