Kakaarot commited on
Commit
4b486c5
·
verified ·
1 Parent(s): 36ee1a9

I used 1.75x Tokens as one word and gave an instruction on checking discussion

Browse files
Files changed (1) hide show
  1. app.py +20 -10
app.py CHANGED
@@ -76,30 +76,38 @@ def generate_text(prompt, tone, max_length, temperature=0.7, top_p=0.9, repetiti
76
  # This turns our input text (with the tone instruction) into a format (tensors) that the model can process using the tokenizer.
77
  input_token_length = input_ids.shape[1] # Get the number of tokens in the input
78
  # Store the length of the input
79
-
 
 
 
 
 
 
 
80
  outputs = model.generate(
81
  inputs["input_ids"],
82
  # max_length=max_length + len(input_text.split()),
83
  # This sets how long the generated text can be. We add the number of words in our input text (len(input_text.split())) to the max_length the user picked, so the model knows how many total words to create.
84
  # CHANGE: Use max_new_tokens for clarity instead of calculating total length
85
- max_new_tokens=max_length,
86
  # Generate THIS many NEW tokens
87
- temperature=temperature,
88
  # This controls how creative the model gets. A lower temperature (e.g., 0.7) keeps things more predictable, while a higher one makes it wilder and more random—think of it like adjusting the spice level!
89
  top_p=top_p,
90
  # This is like a filter for word choices. It picks from the top percentage of likely words (e.g., 0.9 means 90% of the best options), making the output diverse but not too crazy.
91
- repetition_penalty=repetition_penalty,
92
  # This stops the model from repeating the same words too much. A higher value (e.g., 1.5) pushes it to try new words, like telling it to mix up its vocabulary!
93
- num_return_sequences=1,
94
  # This tells the model to give us just one version of the text. If we wanted more options, we could change
95
- do_sample=True,
96
- pad_token_id=tokenizer.eos_token_id # Good practice for generation
97
  )
98
- # --- Decode ONLY the generated part ---
99
  # Slice the output tensor to get only the tokens AFTER the input tokens
100
  # This tells the model to generate text: it uses the input IDs, sets a max length, and adjusts creativity with temperature, top_p, and repetition_penalty.
101
  generated_token_ids = outputs[0, input_token_length:]
102
- generated_text = tokenizer.decode(generated_token_ids, skip_special_tokens=True)
 
103
  return generated_text # Return only the newly generated text
104
  # This turns again the model's output back into readable form, skipping any extra tokens we don’t need.
105
 
@@ -325,6 +333,8 @@ st.markdown("""
325
  # This part is generally telling how things work
326
  st.markdown("""
327
  <div class="instructions">
 
 
328
  Enter a prompt below to generate text using the Gemma model from DeepMind. Customize the tone and length to see different outputs!<br><br>
329
  <b>Example:</b> Prompt: "The cat sat on" | Tone: "Funny" | Length: 50 → "The cat sat on my homework and laughed as I cried over my grades."
330
  </div>
@@ -387,7 +397,7 @@ with st.form(key="input_form"):
387
  with col1:
388
  tone = st.selectbox("Tone", ["Funny", "Serious", "Poetic"], index=["Funny", "Serious", "Poetic"].index(st.session_state.get("tone", "Funny")))
389
  with col2:
390
- max_length = st.slider("Word count", 20, 100, 50)
391
  # This adds a slider for users to set how many words they want in the output, ranging from 20 to 100 with a default of 50.
392
  # And similarly every slider here works
393
 
 
76
  # This turns our input text (with the tone instruction) into a format (tensors) that the model can process using the tokenizer.
77
  input_token_length = input_ids.shape[1] # Get the number of tokens in the input
78
  # Store the length of the input
79
+
80
+ # --- Step 1: Estimate Tokens Needed (Increase the buffer) ---
81
+ # Estimate slightly more tokens than words (e.g., 1.5x or 2x buffer)
82
+ # Let's use a factor of 1.75 for a larger buffer to increase chances of reaching word count
83
+ estimated_max_tokens = int(max_length * 1.75)
84
+ # Add a minimum token generation to avoid tiny requests
85
+ estimated_max_tokens = max(estimated_max_tokens, 30) # Ensure we generate at least some tokens
86
+ # --- Step 2: Generate with the higher token limit ---
87
  outputs = model.generate(
88
  inputs["input_ids"],
89
  # max_length=max_length + len(input_text.split()),
90
  # This sets how long the generated text can be. We add the number of words in our input text (len(input_text.split())) to the max_length the user picked, so the model knows how many total words to create.
91
  # CHANGE: Use max_new_tokens for clarity instead of calculating total length
92
+ max_new_tokens = estimated_max_tokens, # Use the higher estimate, meaning 1.75 times the lenght
93
  # Generate THIS many NEW tokens
94
+ temperature = temperature,
95
  # This controls how creative the model gets. A lower temperature (e.g., 0.7) keeps things more predictable, while a higher one makes it wilder and more random—think of it like adjusting the spice level!
96
  top_p=top_p,
97
  # This is like a filter for word choices. It picks from the top percentage of likely words (e.g., 0.9 means 90% of the best options), making the output diverse but not too crazy.
98
+ repetition_penalty = repetition_penalty,
99
  # This stops the model from repeating the same words too much. A higher value (e.g., 1.5) pushes it to try new words, like telling it to mix up its vocabulary!
100
+ num_return_sequences = 1,
101
  # This tells the model to give us just one version of the text. If we wanted more options, we could change
102
+ do_sample = True,
103
+ pad_token_id = tokenizer.eos_token_id # Good practice for generation
104
  )
105
+ # --- Step 3: Decode ONLY the generated part ---
106
  # Slice the output tensor to get only the tokens AFTER the input tokens
107
  # This tells the model to generate text: it uses the input IDs, sets a max length, and adjusts creativity with temperature, top_p, and repetition_penalty.
108
  generated_token_ids = outputs[0, input_token_length:]
109
+ generated_text = tokenizer.decode(generated_token_ids, skip_special_tokens=True).strip()
110
+
111
  return generated_text # Return only the newly generated text
112
  # This turns again the model's output back into readable form, skipping any extra tokens we don’t need.
113
 
 
333
  # This part is generally telling how things work
334
  st.markdown("""
335
  <div class="instructions">
336
+ <b><a href = "https://huggingface.co/spaces/Kakaarot/Gemma-HuggingFace_TextCompletion_Demo/discussions/1">Please check the discussion</a><b>, I mentioned there the reason, why your first response will take little more time.
337
+ Thanks for understanding, Now Enjoyyy 😁 <br><br>
338
  Enter a prompt below to generate text using the Gemma model from DeepMind. Customize the tone and length to see different outputs!<br><br>
339
  <b>Example:</b> Prompt: "The cat sat on" | Tone: "Funny" | Length: 50 → "The cat sat on my homework and laughed as I cried over my grades."
340
  </div>
 
397
  with col1:
398
  tone = st.selectbox("Tone", ["Funny", "Serious", "Poetic"], index=["Funny", "Serious", "Poetic"].index(st.session_state.get("tone", "Funny")))
399
  with col2:
400
+ max_length = st.slider("Word count", 20, 100, 50, help="Tries to generate text close to this word count. Output might be shorter if the model finishes early, or slightly different due to word splitting. I am considering 1.75 tokens as one word.")
401
  # This adds a slider for users to set how many words they want in the output, ranging from 20 to 100 with a default of 50.
402
  # And similarly every slider here works
403