import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download import os print("Starting model download...") # Download model file explicitly (better control) try: # Try to find the GGUF file in the repo # If your repo has a different filename, change it here model_path = hf_hub_download( repo_id="TheBloke/CodeLlama-7B-Python-GGUF", # Using TheBloke's reliable repo filename="codellama-7b-python.Q4_K_M.gguf", # 4.08GB file cache_dir="./models" ) print(f"✓ Model downloaded to: {model_path}") except Exception as e: print(f"Error downloading model: {e}") raise # Load the GGUF model print("Loading model into memory...") llm = Llama( model_path=model_path, n_ctx=2048, # Context window n_threads=int(os.getenv("N_THREADS", "2")), # CPU threads n_batch=512, # Batch size for prompt processing verbose=True ) print("✓ Model loaded successfully!") def generate_code(prompt, max_tokens=500, temperature=0.7): """Generate code from prompt""" try: response = llm( prompt, max_tokens=max_tokens, temperature=temperature, stop=["", "###", "\n\n\n"], # Stop sequences echo=False ) return response['choices'][0]['text'] except Exception as e: return f"Error generating code: {str(e)}" # Create Gradio interface with gr.Blocks(title="CodeLlama Assistant", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🦙 CodeLlama-7B Python Assistant") gr.Markdown("AI-powered code generation using CodeLlama-7B (4GB GGUF model)") with gr.Row(): with gr.Column(): prompt_input = gr.Textbox( label="Enter your coding question or task", placeholder="Write a Python function to...", lines=5 ) with gr.Row(): max_tokens = gr.Slider( minimum=100, maximum=1000, value=500, step=50, label="Max Tokens" ) temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature" ) submit_btn = gr.Button("🚀 Generate Code", variant="primary", size="lg") clear_btn = gr.Button("🗑️ Clear", size="sm") with gr.Column(): output = gr.Textbox( label="Generated Code", lines=15, show_copy_button=True ) # Button actions submit_btn.click( fn=generate_code, inputs=[prompt_input, max_tokens, temperature], outputs=output ) clear_btn.click( fn=lambda: ("", ""), inputs=None, outputs=[prompt_input, output] ) # Example prompts gr.Examples( examples=[ ["Write a Python function to calculate fibonacci numbers"], ["Create a binary search tree class with insert and search methods"], ["Write a function to reverse a linked list"], ["Implement quicksort algorithm in Python"], ["Create a decorator to measure function execution time"] ], inputs=prompt_input ) gr.Markdown(""" ### 💡 Tips: - Be specific in your prompts for better results - Lower temperature (0.3-0.5) for more focused code - Higher temperature (0.7-0.9) for more creative solutions - Model works best for Python code generation """) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)