File size: 2,921 Bytes
f0d2cc4
5966b70
0057689
f0d2cc4
 
 
0c55feb
 
5966b70
 
f0d2cc4
b0204a5
5966b70
0057689
5966b70
0057689
 
 
 
 
8181b3a
0057689
 
5966b70
8181b3a
c0840a3
8181b3a
 
5966b70
f0d2cc4
0057689
5966b70
f0d2cc4
c0840a3
5966b70
c0840a3
8181b3a
b0204a5
 
 
 
8181b3a
0057689
 
 
 
 
 
8181b3a
c0840a3
 
0057689
8181b3a
0057689
8181b3a
f0d2cc4
0057689
f0d2cc4
0057689
 
 
 
f0d2cc4
 
 
 
 
b0204a5
 
f0d2cc4
5966b70
 
 
 
 
 
 
 
 
 
8181b3a
5966b70
0057689
5966b70
 
 
 
8181b3a
5966b70
 
 
 
 
 
0057689
5966b70
 
f0d2cc4
5966b70
f0d2cc4
 
 
5966b70
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os

# Model configuration
MODEL_REPO = "druvx13/gpt2-Q8_0-GGUF"
MODEL_FILE = "gpt2-q8_0.gguf"
CACHE_DIR = "./model_cache"
MAX_TOKENS = 200

# Initialize model
def load_model():
    """Download and load GGUF model with proper path handling"""
    os.makedirs(CACHE_DIR, exist_ok=True)
    
    model_path = hf_hub_download(
        repo_id=MODEL_REPO,
        filename=MODEL_FILE,
        cache_dir=CACHE_DIR,
        force_download=False
    )
    
    return Llama(
        model_path=model_path,
        n_ctx=1024,
        n_threads=4,
        verbose=False
    )

# Load model at startup
llm = load_model()

# Generation function with anti-repetition
def generate_text(prompt, max_tokens=MAX_TOKENS, temp=0.7, top_p=0.95):
    """Generate text with improved repetition handling"""
    if not prompt.strip():
        return "⚠️ Please enter a valid prompt."
        
    if len(prompt.split()) < 3:  # Minimum word count
        return "⚠️ Please enter at least 3 words for better results."
        
    try:
        output = llm(
            prompt=prompt,
            max_tokens=max_tokens,
            temperature=temp,
            top_p=top_p,
            echo=False,
            # Only use supported parameters
            repeat_penalty=1.5  # Increased from 1.2
        )
        return output["choices"][0]["text"].strip()
    except Exception as e:
        return f"⚠️ Error generating text: {str(e)}"

# UI Components
with gr.Blocks(theme="soft") as demo:
    gr.Markdown("""
    # 🧠 GPT2 Text Generator (GGUF Version)
    Enter a prompt and adjust parameters to generate AI text using the quantized GPT2 model.
    """)
    
    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(
                label="Input Prompt", 
                placeholder="Enter your prompt here... (at least 3 words)",
                lines=5
            )
            max_tokens = gr.Slider(
                minimum=50, 
                maximum=500, 
                value=200, 
                step=50,
                label="Max Output Length"
            )
            temp = gr.Slider(
                minimum=0.1, 
                maximum=1.0, 
                value=0.85, 
                step=0.1,
                label="Creativity (Temperature)"
            )
            top_p = gr.Slider(
                minimum=0.1, 
                maximum=1.0, 
                value=0.9, 
                step=0.05,
                label="Top-p Sampling"
            )
            
        with gr.Column():
            output = gr.Textbox(label="Generated Text", lines=10)
            generate_btn = gr.Button("🚀 Generate", variant="primary")
    
    generate_btn.click(
        fn=generate_text,
        inputs=[prompt, max_tokens, temp, top_p],
        outputs=output
    )

demo.launch()