Spaces:

druvx13
/

Ztar

Sleeping

File size: 2,921 Bytes

import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os

# Model configuration
MODEL_REPO = "druvx13/gpt2-Q8_0-GGUF"
MODEL_FILE = "gpt2-q8_0.gguf"
CACHE_DIR = "./model_cache"
MAX_TOKENS = 200

# Initialize model
def load_model():
    """Download and load GGUF model with proper path handling"""
    os.makedirs(CACHE_DIR, exist_ok=True)
    
    model_path = hf_hub_download(
        repo_id=MODEL_REPO,
        filename=MODEL_FILE,
        cache_dir=CACHE_DIR,
        force_download=False
    )
    
    return Llama(
        model_path=model_path,
        n_ctx=1024,
        n_threads=4,
        verbose=False
    )

# Load model at startup
llm = load_model()

# Generation function with anti-repetition
def generate_text(prompt, max_tokens=MAX_TOKENS, temp=0.7, top_p=0.95):
    """Generate text with improved repetition handling"""
    if not prompt.strip():
        return "⚠️ Please enter a valid prompt."
        
    if len(prompt.split()) < 3:  # Minimum word count
        return "⚠️ Please enter at least 3 words for better results."
        
    try:
        output = llm(
            prompt=prompt,
            max_tokens=max_tokens,
            temperature=temp,
            top_p=top_p,
            echo=False,
            # Only use supported parameters
            repeat_penalty=1.5  # Increased from 1.2
        )
        return output["choices"][0]["text"].strip()
    except Exception as e:
        return f"⚠️ Error generating text: {str(e)}"

# UI Components
with gr.Blocks(theme="soft") as demo:
    gr.Markdown("""
    # 🧠 GPT2 Text Generator (GGUF Version)
    Enter a prompt and adjust parameters to generate AI text using the quantized GPT2 model.
    """)
    
    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(
                label="Input Prompt", 
                placeholder="Enter your prompt here... (at least 3 words)",
                lines=5
            )
            max_tokens = gr.Slider(
                minimum=50, 
                maximum=500, 
                value=200, 
                step=50,
                label="Max Output Length"
            )
            temp = gr.Slider(
                minimum=0.1, 
                maximum=1.0, 
                value=0.85, 
                step=0.1,
                label="Creativity (Temperature)"
            )
            top_p = gr.Slider(
                minimum=0.1, 
                maximum=1.0, 
                value=0.9, 
                step=0.05,
                label="Top-p Sampling"
            )
            
        with gr.Column():
            output = gr.Textbox(label="Generated Text", lines=10)
            generate_btn = gr.Button("🚀 Generate", variant="primary")
    
    generate_btn.click(
        fn=generate_text,
        inputs=[prompt, max_tokens, temp, top_p],
        outputs=output
    )

demo.launch()