Update app.py
Browse files
app.py
CHANGED
|
@@ -23,7 +23,6 @@ from transformers import (
|
|
| 23 |
from transformers.image_utils import load_image
|
| 24 |
from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
| 25 |
|
| 26 |
-
|
| 27 |
DESCRIPTION = """
|
| 28 |
# QwQ Edge 💬
|
| 29 |
"""
|
|
@@ -48,6 +47,29 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
| 48 |
|
| 49 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
# Load text-only model and tokenizer
|
| 52 |
model_id = "prithivMLmods/FastThink-0.5B-Tiny"
|
| 53 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
@@ -129,28 +151,6 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
|
|
| 129 |
seed = random.randint(0, MAX_SEED)
|
| 130 |
return seed
|
| 131 |
|
| 132 |
-
def progress_with_text(text):
|
| 133 |
-
"""
|
| 134 |
-
Returns an HTML snippet that shows an animated progress bar along with the given text.
|
| 135 |
-
"""
|
| 136 |
-
return f"""
|
| 137 |
-
<div style="display: flex; align-items: center;">
|
| 138 |
-
<span style="margin-right: 10px;">Thinking...</span>
|
| 139 |
-
<div style="width: 110px; height: 5px; background-color: #ddd; overflow: hidden; position: relative; margin-left: 10px;">
|
| 140 |
-
<div style="width: 50%; height: 100%; background-color: #1565c0; animation: loading 1.5s linear infinite;"></div>
|
| 141 |
-
</div>
|
| 142 |
-
</div>
|
| 143 |
-
<div style="margin-top: 10px;">
|
| 144 |
-
{text}
|
| 145 |
-
</div>
|
| 146 |
-
<style>
|
| 147 |
-
@keyframes loading {{
|
| 148 |
-
0% {{ transform: translateX(-50%); }}
|
| 149 |
-
100% {{ transform: translateX(100%); }}
|
| 150 |
-
}}
|
| 151 |
-
</style>
|
| 152 |
-
"""
|
| 153 |
-
|
| 154 |
@spaces.GPU(duration=60, enable_queue=True)
|
| 155 |
def generate_image_fn(
|
| 156 |
prompt: str,
|
|
@@ -190,7 +190,6 @@ def generate_image_fn(
|
|
| 190 |
batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
|
| 191 |
if "negative_prompt" in batch_options and batch_options["negative_prompt"] is not None:
|
| 192 |
batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
|
| 193 |
-
# Wrap the pipeline call in autocast if using CUDA
|
| 194 |
if device.type == "cuda":
|
| 195 |
with torch.autocast("cuda", dtype=torch.float16):
|
| 196 |
outputs = sd_pipe(**batch_options)
|
|
@@ -219,10 +218,11 @@ def generate(
|
|
| 219 |
text = input_dict["text"]
|
| 220 |
files = input_dict.get("files", [])
|
| 221 |
|
|
|
|
| 222 |
if text.strip().lower().startswith("@image"):
|
| 223 |
-
# Remove the "@image" tag and use the rest as prompt
|
| 224 |
prompt = text[len("@image"):].strip()
|
| 225 |
-
|
|
|
|
| 226 |
image_paths, used_seed = generate_image_fn(
|
| 227 |
prompt=prompt,
|
| 228 |
negative_prompt="",
|
|
@@ -236,7 +236,7 @@ def generate(
|
|
| 236 |
use_resolution_binning=True,
|
| 237 |
num_images=1,
|
| 238 |
)
|
| 239 |
-
#
|
| 240 |
yield gr.Image(image_paths[0])
|
| 241 |
return # Exit early
|
| 242 |
|
|
@@ -247,16 +247,14 @@ def generate(
|
|
| 247 |
if is_tts and voice_index:
|
| 248 |
voice = TTS_VOICES[voice_index - 1]
|
| 249 |
text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
|
| 250 |
-
# Clear previous chat history for a fresh TTS request.
|
| 251 |
conversation = [{"role": "user", "content": text}]
|
| 252 |
else:
|
| 253 |
voice = None
|
| 254 |
-
# Remove any stray @tts tags and build the conversation history.
|
| 255 |
text = text.replace(tts_prefix, "").strip()
|
| 256 |
conversation = clean_chat_history(chat_history)
|
| 257 |
conversation.append({"role": "user", "content": text})
|
| 258 |
|
| 259 |
-
#
|
| 260 |
if files:
|
| 261 |
if len(files) > 1:
|
| 262 |
images = [load_image(image) for image in files]
|
|
@@ -279,15 +277,21 @@ def generate(
|
|
| 279 |
thread.start()
|
| 280 |
|
| 281 |
buffer = ""
|
| 282 |
-
#
|
| 283 |
-
yield gr.HTML(
|
| 284 |
for new_text in streamer:
|
| 285 |
buffer += new_text
|
| 286 |
buffer = buffer.replace("<|im_end|>", "")
|
| 287 |
-
|
| 288 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
else:
|
| 290 |
-
#
|
| 291 |
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
|
| 292 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
| 293 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
|
@@ -308,17 +312,21 @@ def generate(
|
|
| 308 |
t = Thread(target=model.generate, kwargs=generation_kwargs)
|
| 309 |
t.start()
|
| 310 |
|
| 311 |
-
buffer = ""
|
| 312 |
-
# Yield the initial animated progress bar with no text yet.
|
| 313 |
-
yield gr.HTML(progress_with_text(""))
|
| 314 |
outputs = []
|
|
|
|
|
|
|
|
|
|
| 315 |
for new_text in streamer:
|
| 316 |
outputs.append(new_text)
|
| 317 |
buffer = "".join(outputs)
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
|
| 323 |
# If TTS was requested, convert the final response to speech.
|
| 324 |
if is_tts and voice:
|
|
|
|
| 23 |
from transformers.image_utils import load_image
|
| 24 |
from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
| 25 |
|
|
|
|
| 26 |
DESCRIPTION = """
|
| 27 |
# QwQ Edge 💬
|
| 28 |
"""
|
|
|
|
| 47 |
|
| 48 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 49 |
|
| 50 |
+
# Define a helper function that returns HTML for a progress bar with a label.
|
| 51 |
+
def progress_bar_html_with_label(label="Thinking..."):
|
| 52 |
+
return f"""
|
| 53 |
+
<div style="font-weight: bold; margin-bottom: 5px;">{label}</div>
|
| 54 |
+
<div id="progress-container" style="width: 100%; background-color: #eee; border-radius: 4px; overflow: hidden;">
|
| 55 |
+
<div id="progress-bar" style="width: 0%; height: 10px; background-color: limegreen; transition: width 0.1s;"></div>
|
| 56 |
+
</div>
|
| 57 |
+
<script>
|
| 58 |
+
(function() {{
|
| 59 |
+
let progressBar = document.getElementById("progress-bar");
|
| 60 |
+
let width = 0;
|
| 61 |
+
let interval = setInterval(function(){{
|
| 62 |
+
if(width < 100) {{
|
| 63 |
+
width += 1;
|
| 64 |
+
progressBar.style.width = width + "%";
|
| 65 |
+
}} else {{
|
| 66 |
+
clearInterval(interval);
|
| 67 |
+
}}
|
| 68 |
+
}}, 100);
|
| 69 |
+
}})();
|
| 70 |
+
</script>
|
| 71 |
+
"""
|
| 72 |
+
|
| 73 |
# Load text-only model and tokenizer
|
| 74 |
model_id = "prithivMLmods/FastThink-0.5B-Tiny"
|
| 75 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
|
| 151 |
seed = random.randint(0, MAX_SEED)
|
| 152 |
return seed
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
@spaces.GPU(duration=60, enable_queue=True)
|
| 155 |
def generate_image_fn(
|
| 156 |
prompt: str,
|
|
|
|
| 190 |
batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
|
| 191 |
if "negative_prompt" in batch_options and batch_options["negative_prompt"] is not None:
|
| 192 |
batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
|
|
|
|
| 193 |
if device.type == "cuda":
|
| 194 |
with torch.autocast("cuda", dtype=torch.float16):
|
| 195 |
outputs = sd_pipe(**batch_options)
|
|
|
|
| 218 |
text = input_dict["text"]
|
| 219 |
files = input_dict.get("files", [])
|
| 220 |
|
| 221 |
+
# If the command is for image generation
|
| 222 |
if text.strip().lower().startswith("@image"):
|
|
|
|
| 223 |
prompt = text[len("@image"):].strip()
|
| 224 |
+
# Show animated progress bar with "Generating Image" label
|
| 225 |
+
yield gr.HTML(progress_bar_html_with_label("Generating Image"))
|
| 226 |
image_paths, used_seed = generate_image_fn(
|
| 227 |
prompt=prompt,
|
| 228 |
negative_prompt="",
|
|
|
|
| 236 |
use_resolution_binning=True,
|
| 237 |
num_images=1,
|
| 238 |
)
|
| 239 |
+
# After generation, yield only the image (progress bar no longer shown)
|
| 240 |
yield gr.Image(image_paths[0])
|
| 241 |
return # Exit early
|
| 242 |
|
|
|
|
| 247 |
if is_tts and voice_index:
|
| 248 |
voice = TTS_VOICES[voice_index - 1]
|
| 249 |
text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
|
|
|
|
| 250 |
conversation = [{"role": "user", "content": text}]
|
| 251 |
else:
|
| 252 |
voice = None
|
|
|
|
| 253 |
text = text.replace(tts_prefix, "").strip()
|
| 254 |
conversation = clean_chat_history(chat_history)
|
| 255 |
conversation.append({"role": "user", "content": text})
|
| 256 |
|
| 257 |
+
# Multimodal generation (with file inputs)
|
| 258 |
if files:
|
| 259 |
if len(files) > 1:
|
| 260 |
images = [load_image(image) for image in files]
|
|
|
|
| 277 |
thread.start()
|
| 278 |
|
| 279 |
buffer = ""
|
| 280 |
+
# Show initial progress bar with label "Thinking..."
|
| 281 |
+
yield gr.HTML(progress_bar_html_with_label("Thinking..."))
|
| 282 |
for new_text in streamer:
|
| 283 |
buffer += new_text
|
| 284 |
buffer = buffer.replace("<|im_end|>", "")
|
| 285 |
+
# Update the message to show both the progress bar and current text output.
|
| 286 |
+
html = f"""
|
| 287 |
+
{progress_bar_html_with_label("Thinking...")}
|
| 288 |
+
<div style="margin-top: 10px;">{buffer}</div>
|
| 289 |
+
"""
|
| 290 |
+
yield gr.HTML(html)
|
| 291 |
+
# Final output: only the generated text without the progress bar.
|
| 292 |
+
yield buffer
|
| 293 |
else:
|
| 294 |
+
# Text-only generation
|
| 295 |
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
|
| 296 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
| 297 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
|
|
|
| 312 |
t = Thread(target=model.generate, kwargs=generation_kwargs)
|
| 313 |
t.start()
|
| 314 |
|
|
|
|
|
|
|
|
|
|
| 315 |
outputs = []
|
| 316 |
+
buffer = ""
|
| 317 |
+
# Show initial progress bar with label "Thinking..."
|
| 318 |
+
yield gr.HTML(progress_bar_html_with_label("Thinking..."))
|
| 319 |
for new_text in streamer:
|
| 320 |
outputs.append(new_text)
|
| 321 |
buffer = "".join(outputs)
|
| 322 |
+
html = f"""
|
| 323 |
+
{progress_bar_html_with_label("Thinking...")}
|
| 324 |
+
<div style="margin-top: 10px;">{buffer}</div>
|
| 325 |
+
"""
|
| 326 |
+
yield gr.HTML(html)
|
| 327 |
+
final_response = buffer
|
| 328 |
+
# Final output: just the final text.
|
| 329 |
+
yield final_response
|
| 330 |
|
| 331 |
# If TTS was requested, convert the final response to speech.
|
| 332 |
if is_tts and voice:
|