import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download import os # Model configuration MODEL_REPO = "druvx13/gpt2-Q8_0-GGUF" MODEL_FILE = "gpt2-q8_0.gguf" CACHE_DIR = "./model_cache" MAX_TOKENS = 200 # Initialize model def load_model(): """Download and load GGUF model with proper path handling""" os.makedirs(CACHE_DIR, exist_ok=True) model_path = hf_hub_download( repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir=CACHE_DIR, force_download=False ) return Llama( model_path=model_path, n_ctx=1024, n_threads=4, verbose=False ) # Load model at startup llm = load_model() # Generation function with anti-repetition def generate_text(prompt, max_tokens=MAX_TOKENS, temp=0.7, top_p=0.95): """Generate text with improved repetition handling""" if not prompt.strip(): return "⚠️ Please enter a valid prompt." if len(prompt.split()) < 3: # Minimum word count return "⚠️ Please enter at least 3 words for better results." try: output = llm( prompt=prompt, max_tokens=max_tokens, temperature=temp, top_p=top_p, echo=False, # Only use supported parameters repeat_penalty=1.5 # Increased from 1.2 ) return output["choices"][0]["text"].strip() except Exception as e: return f"⚠️ Error generating text: {str(e)}" # UI Components with gr.Blocks(theme="soft") as demo: gr.Markdown(""" # 🧠 GPT2 Text Generator (GGUF Version) Enter a prompt and adjust parameters to generate AI text using the quantized GPT2 model. """) with gr.Row(): with gr.Column(): prompt = gr.Textbox( label="Input Prompt", placeholder="Enter your prompt here... (at least 3 words)", lines=5 ) max_tokens = gr.Slider( minimum=50, maximum=500, value=200, step=50, label="Max Output Length" ) temp = gr.Slider( minimum=0.1, maximum=1.0, value=0.85, step=0.1, label="Creativity (Temperature)" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p Sampling" ) with gr.Column(): output = gr.Textbox(label="Generated Text", lines=10) generate_btn = gr.Button("🚀 Generate", variant="primary") generate_btn.click( fn=generate_text, inputs=[prompt, max_tokens, temp, top_p], outputs=output ) demo.launch()