Handeled the issue of prompt and instructions covering the space of actual response
Browse files
app.py
CHANGED
|
@@ -72,11 +72,18 @@ def generate_text(prompt, tone, max_length, temperature=0.7, top_p=0.9, repetiti
|
|
| 72 |
input_text = tone_prompts.get(tone, prompt)
|
| 73 |
# This picks the right instruction from the dictionary based on the tone.
|
| 74 |
inputs = tokenizer(input_text, return_tensors="pt")
|
|
|
|
| 75 |
# This turns our input text (with the tone instruction) into a format (tensors) that the model can process using the tokenizer.
|
|
|
|
|
|
|
|
|
|
| 76 |
outputs = model.generate(
|
| 77 |
inputs["input_ids"],
|
| 78 |
-
max_length=max_length + len(input_text.split()),
|
| 79 |
# This sets how long the generated text can be. We add the number of words in our input text (len(input_text.split())) to the max_length the user picked, so the model knows how many total words to create.
|
|
|
|
|
|
|
|
|
|
| 80 |
temperature=temperature,
|
| 81 |
# This controls how creative the model gets. A lower temperature (e.g., 0.7) keeps things more predictable, while a higher one makes it wilder and more random—think of it like adjusting the spice level!
|
| 82 |
top_p=top_p,
|
|
@@ -86,9 +93,14 @@ def generate_text(prompt, tone, max_length, temperature=0.7, top_p=0.9, repetiti
|
|
| 86 |
num_return_sequences=1,
|
| 87 |
# This tells the model to give us just one version of the text. If we wanted more options, we could change
|
| 88 |
do_sample=True
|
|
|
|
| 89 |
)
|
|
|
|
|
|
|
| 90 |
# This tells the model to generate text: it uses the input IDs, sets a max length, and adjusts creativity with temperature, top_p, and repetition_penalty.
|
| 91 |
-
|
|
|
|
|
|
|
| 92 |
# This turns again the model's output back into readable form, skipping any extra tokens we don’t need.
|
| 93 |
|
| 94 |
# Clean and Solid UI for our Project, keeping the blue theme of gemini.
|
|
|
|
| 72 |
input_text = tone_prompts.get(tone, prompt)
|
| 73 |
# This picks the right instruction from the dictionary based on the tone.
|
| 74 |
inputs = tokenizer(input_text, return_tensors="pt")
|
| 75 |
+
input_ids = inputs["input_ids"]
|
| 76 |
# This turns our input text (with the tone instruction) into a format (tensors) that the model can process using the tokenizer.
|
| 77 |
+
input_token_length = input_ids.shape[1] # Get the number of tokens in the input
|
| 78 |
+
# Store the length of the input
|
| 79 |
+
|
| 80 |
outputs = model.generate(
|
| 81 |
inputs["input_ids"],
|
| 82 |
+
# max_length=max_length + len(input_text.split()),
|
| 83 |
# This sets how long the generated text can be. We add the number of words in our input text (len(input_text.split())) to the max_length the user picked, so the model knows how many total words to create.
|
| 84 |
+
# CHANGE: Use max_new_tokens for clarity instead of calculating total length
|
| 85 |
+
max_new_tokens=max_length
|
| 86 |
+
# Generate THIS many NEW tokens
|
| 87 |
temperature=temperature,
|
| 88 |
# This controls how creative the model gets. A lower temperature (e.g., 0.7) keeps things more predictable, while a higher one makes it wilder and more random—think of it like adjusting the spice level!
|
| 89 |
top_p=top_p,
|
|
|
|
| 93 |
num_return_sequences=1,
|
| 94 |
# This tells the model to give us just one version of the text. If we wanted more options, we could change
|
| 95 |
do_sample=True
|
| 96 |
+
pad_token_id=tokenizer.eos_token_id # Good practice for generation
|
| 97 |
)
|
| 98 |
+
# --- Decode ONLY the generated part ---
|
| 99 |
+
# Slice the output tensor to get only the tokens AFTER the input tokens
|
| 100 |
# This tells the model to generate text: it uses the input IDs, sets a max length, and adjusts creativity with temperature, top_p, and repetition_penalty.
|
| 101 |
+
generated_token_ids = outputs[0, input_token_length:]
|
| 102 |
+
generated_text = tokenizer.decode(generated_token_ids, skip_special_tokens=True)
|
| 103 |
+
return generated_text # Return only the newly generated text
|
| 104 |
# This turns again the model's output back into readable form, skipping any extra tokens we don’t need.
|
| 105 |
|
| 106 |
# Clean and Solid UI for our Project, keeping the blue theme of gemini.
|