Ztar / app.py
druvx13's picture
Update app.py
0c55feb verified
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
# Model configuration
MODEL_REPO = "druvx13/gpt2-Q8_0-GGUF"
MODEL_FILE = "gpt2-q8_0.gguf"
CACHE_DIR = "./model_cache"
MAX_TOKENS = 200
# Initialize model
def load_model():
"""Download and load GGUF model with proper path handling"""
os.makedirs(CACHE_DIR, exist_ok=True)
model_path = hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILE,
cache_dir=CACHE_DIR,
force_download=False
)
return Llama(
model_path=model_path,
n_ctx=1024,
n_threads=4,
verbose=False
)
# Load model at startup
llm = load_model()
# Generation function with anti-repetition
def generate_text(prompt, max_tokens=MAX_TOKENS, temp=0.7, top_p=0.95):
"""Generate text with improved repetition handling"""
if not prompt.strip():
return "⚠️ Please enter a valid prompt."
if len(prompt.split()) < 3: # Minimum word count
return "⚠️ Please enter at least 3 words for better results."
try:
output = llm(
prompt=prompt,
max_tokens=max_tokens,
temperature=temp,
top_p=top_p,
echo=False,
# Only use supported parameters
repeat_penalty=1.5 # Increased from 1.2
)
return output["choices"][0]["text"].strip()
except Exception as e:
return f"⚠️ Error generating text: {str(e)}"
# UI Components
with gr.Blocks(theme="soft") as demo:
gr.Markdown("""
# 🧠 GPT2 Text Generator (GGUF Version)
Enter a prompt and adjust parameters to generate AI text using the quantized GPT2 model.
""")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(
label="Input Prompt",
placeholder="Enter your prompt here... (at least 3 words)",
lines=5
)
max_tokens = gr.Slider(
minimum=50,
maximum=500,
value=200,
step=50,
label="Max Output Length"
)
temp = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.85,
step=0.1,
label="Creativity (Temperature)"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.9,
step=0.05,
label="Top-p Sampling"
)
with gr.Column():
output = gr.Textbox(label="Generated Text", lines=10)
generate_btn = gr.Button("🚀 Generate", variant="primary")
generate_btn.click(
fn=generate_text,
inputs=[prompt, max_tokens, temp, top_p],
outputs=output
)
demo.launch()