File size: 10,768 Bytes
fa24e24
 
 
51c00b5
fa24e24
 
51c00b5
 
fa24e24
 
51c00b5
fa24e24
 
 
 
 
 
 
 
 
51c00b5
fa24e24
 
 
 
51c00b5
fa24e24
 
 
51c00b5
 
fa24e24
 
51c00b5
 
fa24e24
 
51c00b5
fa24e24
 
51c00b5
fa24e24
 
 
 
 
ffc2dd6
fa24e24
51c00b5
fa24e24
 
51c00b5
fa24e24
 
51c00b5
fa24e24
613387b
fa24e24
 
 
 
 
 
 
 
 
ffc2dd6
fa24e24
 
51c00b5
 
 
 
fa24e24
 
 
 
 
ffc2dd6
fa24e24
 
ffc2dd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa24e24
 
 
ffc2dd6
 
 
 
 
 
 
fa24e24
 
 
 
 
 
 
 
 
51c00b5
 
 
 
 
 
 
 
 
 
 
 
af38572
 
 
51c00b5
 
 
af38572
 
 
51c00b5
 
af38572
 
 
 
51c00b5
 
 
fa24e24
 
af38572
 
613387b
 
 
 
fa24e24
 
 
 
af38572
 
fa24e24
 
 
 
613387b
 
89937b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa24e24
 
 
 
 
 
 
 
 
 
ffc2dd6
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# Hey its your saiyan Utkarsh Shukla. I gonna write my custom comments after each line of code. So even a beginner (previous me) can read and get whats going on.
# Bear with me, I got your Back buddy


import streamlit as st  
# The saviour web app creator, easy peasy web app creation by few lines of codes. 
# No HTML, CSS, or JS needed!


from transformers import AutoTokenizer, AutoModelForCausalLM
# transformers here is just library which gives us access to Transformer architecture based pretrained models for natural language processing and other tasks. For now, Think of Transformer Architecture, as mystery Architecture which makes models way cooler.


# AutoTokenizer helps in Text input -> Sentences -> Words -> Even subwords like ['un', 'break', 'able'] -> Integer IDs that model expects.
# And whats awesome is Tokens will be generated following the configurations and requirements of model which we will be using.

# AutoModelForCausalLM is a powerful and convenient class serves as a high-level interface for loading pre-trained transformer models specifically designed for causal language modeling.
# The cooler part of these Auto* classes are you don't need to know exact class name of model(like GPT2LMHeadModel, CTRLLMHeadModel, ReformerLMHeadModel, etc.).
# Meaning The AutoModelForCausalLM automatically determine the correct model architecture based on the pretrained_model_name_or_path you provide. AutoModelForCausalLM infers this from the configuration files associated with the pre-trained model.


from wordcloud import WordCloud
# This will help us in knowing which words have large frequency. It creates a visual representation of words used, know as Word Cloud.
# More the frequency + More the importance -> Word will appear larger in Word Cloud.
# Mostly it avoids our stop words like it, is, are etc
# More frequency = more importance → bigger word in the cloud.

import matplotlib.pyplot as plt
# This guy helps us to plot. So wait till you see it.
# We’ll use it to show our Word Cloud in style.


import torch
# This import makes a library pyTorch available in our python code.
# This makes the PyTorch library available — a powerful math engine and deep learning framework our model runs on.
# Think of it as a toolkit which can do maths very very efficiently is being available for our code now.


@st.cache_resource
# A decorator in python is a way to enhance a function or a class. As they are followed by @ symbol
# The function above whome they are specified, the decorator code is executed both before and after of function code, on function call.
# Now here @st.cache_resource decorator is used before loading AutoTokenizer and AutoModelForCasualLM from gemm-2b.
# Cache the model and tokenizer to avoid reloading on every run
# So first run will load and save resources to global cache, and as user interact and causes rerun of load_model_and_tokenizer(), instead of loading again it will directly use cached resources from memory
def load_model_and_tokenizer():
    model_name = "google/gemma-2b"  # using gemma-2b for prototype for my GSOC Proposal. Wish me luck.
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    # Responsible for automatically downloading and loading the tokenizer configuration and vocabulary associated with the specified pre-trained model.
     # Downloads and loads the tokenizer config and vocab for the given model
    model = AutoModelForCausalLM.from_pretrained(model_name)
    # As we discussed, this class is designed for loading pre-trained language models for causal (next-token prediction) tasks.
    # Loads the actual model used for causal (next-word) prediction tasks
    return tokenizer, model


# Function to generate text with Gemma
def generate_text(prompt, tone, max_length, temperature=0.7, top_p=0.9, repetition_penalty=1.0):
    tokenizer, model = load_model_and_tokenizer()
    # Adjust prompt based on tone
    tone_prompts = {
        "Funny": f"Generate a funny response to: {prompt}",
        "Serious": f"Provide a serious and thoughtful response to: {prompt}",
        "Poetic": f"Write a poetic response to: {prompt}"
    }
    input_text = tone_prompts.get(tone, prompt)
    
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model.generate(
        inputs["input_ids"],
        max_length=max_length + len(input_text.split()),
        temperature=temperature,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        num_return_sequences=1,
        do_sample=True
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Updated CSS for a modern, awesome look
st.markdown("""
    <style>
    /* Background image with fallback */
    .stApp {
        background: linear-gradient(rgba(0,0,0,0.6), rgba(0,0,0,0.6)), url('images/background.png');
        background-size: cover;
        background-position: center;
        color: #ffffff;  /* White text for contrast */
    }
    /* Cool gradient title with hover animation */
    .title {
        background: linear-gradient(90deg, #00d2ff, #3a7bd5);
        -webkit-background-clip: text;
        color: transparent;
        font-size: 40px;
        font-weight: bold;
        transition: transform 0.3s;
    }
    .title:hover {
        transform: scale(1.05);
    }
    /* Card-like instructions */
    .instructions {
        background: rgba(255, 255, 255, 0.1);
        padding: 15px;
        border-radius: 10px;
        box-shadow: 0 5px 20px rgba(0,0,0,0.3);
        font-size: 18px;
        color: #e0e0e0;
    }
    /* Neon glow output box */
    .output-box {
        background: rgba(30, 30, 50, 0.9);
        padding: 15px;
        border-radius: 12px;
        box-shadow: 0 0 15px #00d2ff, 0 0 30px #3a7bd5;
        font-family: 'Courier New', monospace;
        font-size: 16px;
        color: #ffffff;
        white-space: pre-wrap;
        animation: glow 1.5s infinite alternate;
    }
    @keyframes glow {
        from { box-shadow: 0 0 10px #00d2ff; }
        to { box-shadow: 0 0 20px #3a7bd5; }
    }
    /* Button hover effect */
    .stButton>button {
        background: #3a7bd5;
        color: white;
        border-radius: 8px;
        transition: all 0.3s;
    }
    .stButton>button:hover {
        background: #00d2ff;
        transform: translateY(-2px);
    }
    /* Slider styling */
    .stSlider>div>div>div {
        background: #00d2ff !important;
    }
    </style>
""", unsafe_allow_html=True)

# Header with GSoC logo
col1, col2 = st.columns([3, 1])
with col1:
    st.markdown('<p class="title">Gemma Text Generator</p>', unsafe_allow_html=True)
with col2:
    st.image("images/gsoc_logo.png", width=80, caption="GSoC 2025")


# Instructions and example
st.markdown("""
    <p class="instructions">
    Enter a prompt below to generate text using the Gemma model from DeepMind. Customize the tone and length to see different outputs!<br>
    <b>Example:</b> Prompt: "The cat sat on" | Tone: "Funny" | Length: 50 → "The cat sat on my homework and laughed as I cried over my grades."
    </p>
""", unsafe_allow_html=True)


# Beginner friendly explanation block
with st.expander("\U0001F9E0 How does this work? Click to peek inside."):
    st.markdown("""
    - This app uses **Gemma-2B**, a language model from Google DeepMind.
    - You give it a prompt, and it predicts the next words one-by-one (aka causal language modeling).
    - The **tone** you choose adds flavor to the prompt before it hits the model.
    - Parameters like **temperature** control how wild or safe the answers are.
    - The output is visualized in a **Word Cloud** so you can see which words stand out!
    """)

# One-click examples
if "trigger_example" not in st.session_state:
    st.session_state.trigger_example = False

col1, col2 = st.columns(2)
with col1:
    if st.button("Try Funny Cat Story"):
        st.session_state.prompt = "The cat hacked my WiFi"
        st.session_state.tone = "Funny"
        st.session_state.trigger_example = True
with col2:
    if st.button("Try Poetic Goodbye"):
        st.session_state.prompt = "As the sun set on our final day"
        st.session_state.tone = "Poetic"
        st.session_state.trigger_example = True




# User input section
with st.form(key="input_form"):
    prompt = st.text_input("Enter a prompt", placeholder="e.g., 'The future of AI is'", value=st.session_state.get("prompt", ""))
    tone = st.selectbox("Tone", ["Funny", "Serious", "Poetic"], index=["Funny", "Serious", "Poetic"].index(st.session_state.get("tone", "Funny")))
    temperature = st.slider("Temperature (Creativity)", 0.2, 1.5, 0.7)
    top_p = st.slider("Top-p (Nucleus Sampling)", 0.1, 1.0, 0.9)
    repetition_penalty = st.slider("Repetition Penalty", 1.0, 2.0, 1.0)

    max_length = st.slider("Word count", 20, 100, 50)
    submit_button = st.form_submit_button(label="Generate")

# Generate and display output
if submit_button or st.session_state.trigger_example:
    st.session_state.trigger_example = False  # Reset after use
    if not prompt:
        st.error("Please enter a prompt!")
    else:
        with st.spinner("Generating text..."):
            output = generate_text(prompt, tone, max_length, temperature, top_p, repetition_penalty)

            st.markdown(f"""
                            <div style="
                                background: linear-gradient(145deg, #ffffff33, #1f1f1f99);
                                color: #f5f5f5;
                                padding: 1rem;
                                border-radius: 12px;
                                box-shadow: 0 4px 15px rgba(0,0,0,0.25);
                                font-family: 'Courier New', monospace;
                                font-size: 1rem;
                                white-space: pre-wrap;
                                backdrop-filter: blur(6px);
                                border: 1px solid rgba(255,255,255,0.1);
                                margin-top: 1rem;
                            ">
                                {output}
                            </div>
                        """, unsafe_allow_html=True)

            
            # Generate and display word cloud
            wordcloud = WordCloud(width=400, height=200, background_color="white").generate(output)
            plt.figure(figsize=(8, 4))
            plt.imshow(wordcloud, interpolation="bilinear")
            plt.axis("off")
            st.pyplot(plt)

# Footer
st.markdown("---")
col1, col2 = st.columns([3, 1])
with col1:
    st.write("Built with ❤️ by Utkarsh Shukla for GSoC Proposal 2025 | Powered by (Gemma + Hugging Face) and Saiyan Pride")
    st.write("Wish me luck, 🤞")
with col2:
    st.image("images/gemma_logo.png", width=80, caption="Gemma by DeepMind")