Gemma_Model / app.py
emosenseproject's picture
Upload 2 files
ed1cf13 verified
Raw
History Blame Contribute Delete
1.39 kB
import gradio as gr
from llama_cpp import Llama
# Model Configurations
model_path = "gemma-7b-it.Q2_K.gguf"
model_kwargs = {
"n_ctx": 4096,
"n_threads": 4,
"n_gpu_layers": 0,
}
# Instantiate model
llm = Llama(model_path=model_path, **model_kwargs)
# Generation kwargs
generation_kwargs = {
"max_tokens": 200,
"stop": ["<|endoftext|>", "</s>"],
"echo": False,
"top_k": 1
}
# System prompt
system_prompt = "You are an AI assistant specialized in answering common Python programming questions. Provide clear, concise, and accurate responses."
def generate_response(query):
prompt = f"{system_prompt}\nQuestion: **{query}**\nAnswer:"
res = llm(prompt, **generation_kwargs)
response = res["choices"][0]["text"].strip()
return f"**Question:** '''{query}'''\n\n**Answer:** '''{response}'''"
# Gradio interface
title = "Python Question Answering"
description = "Ask common Python questions and get answers from the LLM model."
interface = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(label="Enter your Python question"),
outputs=gr.Textbox(label="Answer"),
title=title,
description=description,
examples=[
"How do I iterate over a list in Python?",
"What is the difference between a list and a tuple?",
"How do I read a file in Python?",
],
)
interface.launch(share=True)