import gradio as gr from llama_cpp import Llama # Model Configurations model_path = "gemma-7b-it.Q2_K.gguf" model_kwargs = { "n_ctx": 4096, "n_threads": 4, "n_gpu_layers": 0, } # Instantiate model llm = Llama(model_path=model_path, **model_kwargs) # Generation kwargs generation_kwargs = { "max_tokens": 200, "stop": ["<|endoftext|>", ""], "echo": False, "top_k": 1 } # System prompt system_prompt = "You are an AI assistant specialized in answering common Python programming questions. Provide clear, concise, and accurate responses." def generate_response(query): prompt = f"{system_prompt}\nQuestion: **{query}**\nAnswer:" res = llm(prompt, **generation_kwargs) response = res["choices"][0]["text"].strip() return f"**Question:** '''{query}'''\n\n**Answer:** '''{response}'''" # Gradio interface title = "Python Question Answering" description = "Ask common Python questions and get answers from the LLM model." interface = gr.Interface( fn=generate_response, inputs=gr.Textbox(label="Enter your Python question"), outputs=gr.Textbox(label="Answer"), title=title, description=description, examples=[ "How do I iterate over a list in Python?", "What is the difference between a list and a tuple?", "How do I read a file in Python?", ], ) interface.launch(share=True)