import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download import os # Model configuration MODEL_REPO = "druvx13/Qwen3-0.6B-Q5_0-GGUF" MODEL_FILE = "qwen3-0.6b-q5_0.gguf" CACHE_DIR = "./model_cache" MAX_TOKENS = 200 # Initialize model (loads once at startup) def load_model(): """Download and load GGUF model with proper path handling""" os.makedirs(CACHE_DIR, exist_ok=True) # Download model if not cached model_path = hf_hub_download( repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir=CACHE_DIR, force_download=False # Set to True to bypass cache ) return Llama( model_path=model_path, # Now a valid path string n_ctx=2048, # Context window size n_threads=4, # CPU threads for faster inference verbose=False # Disable debug logs ) # Load model at startup llm = load_model() # Generation function with parameters def generate_text(prompt, max_tokens=MAX_TOKENS, temp=0.7, top_p=0.95): """Generate text using GGUF model with parameter control""" try: output = llm( prompt=prompt, max_tokens=max_tokens, temperature=temp, top_p=top_p, echo=False # Don't repeat input in output ) return output["choices"][0]["text"] except Exception as e: return f"Error generating text: {str(e)}" # UI Components with gr.Blocks(theme="soft") as demo: gr.Markdown(""" # 🧠 GPT2 Text Generator (GGUF Version) Enter a prompt and adjust parameters to generate AI text using the quantized GPT2 model. """) with gr.Row(): with gr.Column(): # Input components prompt = gr.Textbox( label="Input Prompt", placeholder="Enter your prompt here...", lines=5 ) max_tokens = gr.Slider( minimum=50, maximum=500, value=200, step=50, label="Max Output Length" ) temp = gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Creativity (Temperature)" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p Sampling" ) with gr.Column(): # Output and button output = gr.Textbox(label="Generated Text", lines=10) generate_btn = gr.Button("🚀 Generate", variant="primary") # Event handler generate_btn.click( fn=generate_text, inputs=[prompt, max_tokens, temp, top_p], outputs=output ) # Launch app demo.launch()