File size: 2,932 Bytes
f0d2cc4
5966b70
0057689
f0d2cc4
 
 
0672ed5
 
5966b70
 
f0d2cc4
5966b70
 
0057689
5966b70
0057689
 
 
 
 
 
 
 
 
5966b70
0057689
 
 
 
5966b70
f0d2cc4
0057689
5966b70
f0d2cc4
0057689
5966b70
0057689
 
 
 
 
 
 
 
 
 
 
 
f0d2cc4
0057689
f0d2cc4
0057689
 
 
 
f0d2cc4
 
 
0057689
f0d2cc4
 
 
 
 
5966b70
 
 
 
 
 
 
 
 
 
 
 
0057689
5966b70
 
 
 
 
 
 
 
 
 
0057689
5966b70
0057689
5966b70
0057689
5966b70
f0d2cc4
5966b70
f0d2cc4
 
 
0057689
5966b70
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os

# Model configuration
MODEL_REPO = "druvx13/Qwen3-0.6B-Q5_0-GGUF"
MODEL_FILE = "qwen3-0.6b-q5_0.gguf"
CACHE_DIR = "./model_cache"
MAX_TOKENS = 200

# Initialize model (loads once at startup)
def load_model():
    """Download and load GGUF model with proper path handling"""
    os.makedirs(CACHE_DIR, exist_ok=True)
    
    # Download model if not cached
    model_path = hf_hub_download(
        repo_id=MODEL_REPO,
        filename=MODEL_FILE,
        cache_dir=CACHE_DIR,
        force_download=False  # Set to True to bypass cache
    )
    
    return Llama(
        model_path=model_path,  # Now a valid path string
        n_ctx=2048,             # Context window size
        n_threads=4,            # CPU threads for faster inference
        verbose=False           # Disable debug logs
    )

# Load model at startup
llm = load_model()

# Generation function with parameters
def generate_text(prompt, max_tokens=MAX_TOKENS, temp=0.7, top_p=0.95):
    """Generate text using GGUF model with parameter control"""
    try:
        output = llm(
            prompt=prompt,
            max_tokens=max_tokens,
            temperature=temp,
            top_p=top_p,
            echo=False  # Don't repeat input in output
        )
        return output["choices"][0]["text"]
    except Exception as e:
        return f"Error generating text: {str(e)}"

# UI Components
with gr.Blocks(theme="soft") as demo:
    gr.Markdown("""
    # 🧠 GPT2 Text Generator (GGUF Version)
    Enter a prompt and adjust parameters to generate AI text using the quantized GPT2 model.
    """)
    
    with gr.Row():
        with gr.Column():
            # Input components
            prompt = gr.Textbox(
                label="Input Prompt", 
                placeholder="Enter your prompt here...",
                lines=5
            )
            max_tokens = gr.Slider(
                minimum=50, 
                maximum=500, 
                value=200, 
                step=50,
                label="Max Output Length"
            )
            temp = gr.Slider(
                minimum=0.1, 
                maximum=1.0, 
                value=0.7, 
                step=0.1,
                label="Creativity (Temperature)"
            )
            top_p = gr.Slider(
                minimum=0.1, 
                maximum=1.0, 
                value=0.95, 
                step=0.05,
                label="Top-p Sampling"
            )
            
        with gr.Column():
            # Output and button
            output = gr.Textbox(label="Generated Text", lines=10)
            generate_btn = gr.Button("🚀 Generate", variant="primary")
    
    # Event handler
    generate_btn.click(
        fn=generate_text,
        inputs=[prompt, max_tokens, temp, top_p],
        outputs=output
    )

# Launch app
demo.launch()