File size: 2,932 Bytes
f0d2cc4 5966b70 0057689 f0d2cc4 0672ed5 5966b70 f0d2cc4 5966b70 0057689 5966b70 0057689 5966b70 0057689 5966b70 f0d2cc4 0057689 5966b70 f0d2cc4 0057689 5966b70 0057689 f0d2cc4 0057689 f0d2cc4 0057689 f0d2cc4 0057689 f0d2cc4 5966b70 0057689 5966b70 0057689 5966b70 0057689 5966b70 0057689 5966b70 f0d2cc4 5966b70 f0d2cc4 0057689 5966b70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
# Model configuration
MODEL_REPO = "druvx13/Qwen3-0.6B-Q5_0-GGUF"
MODEL_FILE = "qwen3-0.6b-q5_0.gguf"
CACHE_DIR = "./model_cache"
MAX_TOKENS = 200
# Initialize model (loads once at startup)
def load_model():
"""Download and load GGUF model with proper path handling"""
os.makedirs(CACHE_DIR, exist_ok=True)
# Download model if not cached
model_path = hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILE,
cache_dir=CACHE_DIR,
force_download=False # Set to True to bypass cache
)
return Llama(
model_path=model_path, # Now a valid path string
n_ctx=2048, # Context window size
n_threads=4, # CPU threads for faster inference
verbose=False # Disable debug logs
)
# Load model at startup
llm = load_model()
# Generation function with parameters
def generate_text(prompt, max_tokens=MAX_TOKENS, temp=0.7, top_p=0.95):
"""Generate text using GGUF model with parameter control"""
try:
output = llm(
prompt=prompt,
max_tokens=max_tokens,
temperature=temp,
top_p=top_p,
echo=False # Don't repeat input in output
)
return output["choices"][0]["text"]
except Exception as e:
return f"Error generating text: {str(e)}"
# UI Components
with gr.Blocks(theme="soft") as demo:
gr.Markdown("""
# 🧠 GPT2 Text Generator (GGUF Version)
Enter a prompt and adjust parameters to generate AI text using the quantized GPT2 model.
""")
with gr.Row():
with gr.Column():
# Input components
prompt = gr.Textbox(
label="Input Prompt",
placeholder="Enter your prompt here...",
lines=5
)
max_tokens = gr.Slider(
minimum=50,
maximum=500,
value=200,
step=50,
label="Max Output Length"
)
temp = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.1,
label="Creativity (Temperature)"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p Sampling"
)
with gr.Column():
# Output and button
output = gr.Textbox(label="Generated Text", lines=10)
generate_btn = gr.Button("🚀 Generate", variant="primary")
# Event handler
generate_btn.click(
fn=generate_text,
inputs=[prompt, max_tokens, temp, top_p],
outputs=output
)
# Launch app
demo.launch() |