File size: 3,796 Bytes
f3eee25
9a382b7
 
 
f3eee25
9a382b7
f3eee25
9a382b7
 
 
 
 
 
 
 
 
 
 
 
 
f3eee25
9a382b7
 
 
 
 
 
 
 
f3eee25
9a382b7
f3eee25
9a382b7
 
 
 
 
 
 
 
 
 
 
 
 
f3eee25
9a382b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3eee25
 
9a382b7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os

print("Starting model download...")

# Download model file explicitly (better control)
try:
    # Try to find the GGUF file in the repo
    # If your repo has a different filename, change it here
    model_path = hf_hub_download(
        repo_id="TheBloke/CodeLlama-7B-Python-GGUF",  # Using TheBloke's reliable repo
        filename="codellama-7b-python.Q4_K_M.gguf",  # 4.08GB file
        cache_dir="./models"
    )
    print(f"βœ“ Model downloaded to: {model_path}")
except Exception as e:
    print(f"Error downloading model: {e}")
    raise

# Load the GGUF model
print("Loading model into memory...")
llm = Llama(
    model_path=model_path,
    n_ctx=2048,  # Context window
    n_threads=int(os.getenv("N_THREADS", "2")),  # CPU threads
    n_batch=512,  # Batch size for prompt processing
    verbose=True
)
print("βœ“ Model loaded successfully!")

def generate_code(prompt, max_tokens=500, temperature=0.7):
    """Generate code from prompt"""
    try:
        response = llm(
            prompt,
            max_tokens=max_tokens,
            temperature=temperature,
            stop=["</s>", "###", "\n\n\n"],  # Stop sequences
            echo=False
        )
        return response['choices'][0]['text']
    except Exception as e:
        return f"Error generating code: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="CodeLlama Assistant", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# πŸ¦™ CodeLlama-7B Python Assistant")
    gr.Markdown("AI-powered code generation using CodeLlama-7B (4GB GGUF model)")
    
    with gr.Row():
        with gr.Column():
            prompt_input = gr.Textbox(
                label="Enter your coding question or task",
                placeholder="Write a Python function to...",
                lines=5
            )
            with gr.Row():
                max_tokens = gr.Slider(
                    minimum=100, 
                    maximum=1000, 
                    value=500, 
                    step=50,
                    label="Max Tokens"
                )
                temperature = gr.Slider(
                    minimum=0.1, 
                    maximum=1.0, 
                    value=0.7, 
                    step=0.1,
                    label="Temperature"
                )
            submit_btn = gr.Button("πŸš€ Generate Code", variant="primary", size="lg")
            clear_btn = gr.Button("πŸ—‘οΈ Clear", size="sm")
        
        with gr.Column():
            output = gr.Textbox(
                label="Generated Code", 
                lines=15,
                show_copy_button=True
            )
    
    # Button actions
    submit_btn.click(
        fn=generate_code,
        inputs=[prompt_input, max_tokens, temperature],
        outputs=output
    )
    
    clear_btn.click(
        fn=lambda: ("", ""),
        inputs=None,
        outputs=[prompt_input, output]
    )
    
    # Example prompts
    gr.Examples(
        examples=[
            ["Write a Python function to calculate fibonacci numbers"],
            ["Create a binary search tree class with insert and search methods"],
            ["Write a function to reverse a linked list"],
            ["Implement quicksort algorithm in Python"],
            ["Create a decorator to measure function execution time"]
        ],
        inputs=prompt_input
    )
    
    gr.Markdown("""
    ### πŸ’‘ Tips:
    - Be specific in your prompts for better results
    - Lower temperature (0.3-0.5) for more focused code
    - Higher temperature (0.7-0.9) for more creative solutions
    - Model works best for Python code generation
    """)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)