core-OCR

Paused

App Files Files Community

prithivMLmods commited on Mar 6

Commit

9604e47

verified ·

1 Parent(s): 34271d1

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -42

app.py CHANGED Viewed

@@ -23,7 +23,6 @@ from transformers import (
 from transformers.image_utils import load_image
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
 DESCRIPTION = """
 # QwQ Edge 💬
 """
@@ -48,6 +47,29 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 # Load text-only model and tokenizer
 model_id = "prithivMLmods/FastThink-0.5B-Tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -129,28 +151,6 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
         seed = random.randint(0, MAX_SEED)
     return seed
-def progress_with_text(text):
-    """
-    Returns an HTML snippet that shows an animated progress bar along with the given text.
-    """
-    return f"""
-<div style="display: flex; align-items: center;">
-    <span style="margin-right: 10px;">Thinking...</span>
-    <div style="width: 110px; height: 5px; background-color: #ddd; overflow: hidden; position: relative; margin-left: 10px;">
-      <div style="width: 50%; height: 100%; background-color: #1565c0; animation: loading 1.5s linear infinite;"></div>
-    </div>
-</div>
-<div style="margin-top: 10px;">
-{text}
-</div>
-<style>
-@keyframes loading {{
-    0% {{ transform: translateX(-50%); }}
-    100% {{ transform: translateX(100%); }}
-}}
-</style>
-"""
 @spaces.GPU(duration=60, enable_queue=True)
 def generate_image_fn(
     prompt: str,
@@ -190,7 +190,6 @@ def generate_image_fn(
         batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
         if "negative_prompt" in batch_options and batch_options["negative_prompt"] is not None:
             batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
-        # Wrap the pipeline call in autocast if using CUDA
         if device.type == "cuda":
             with torch.autocast("cuda", dtype=torch.float16):
                 outputs = sd_pipe(**batch_options)
@@ -219,10 +218,11 @@ def generate(
     text = input_dict["text"]
     files = input_dict.get("files", [])
     if text.strip().lower().startswith("@image"):
-        # Remove the "@image" tag and use the rest as prompt
         prompt = text[len("@image"):].strip()
-        yield "Generating image..."
         image_paths, used_seed = generate_image_fn(
             prompt=prompt,
             negative_prompt="",
@@ -236,7 +236,7 @@ def generate(
             use_resolution_binning=True,
             num_images=1,
         )
-        # Yield the generated image so that the chat interface displays it.
         yield gr.Image(image_paths[0])
         return  # Exit early
@@ -247,16 +247,14 @@ def generate(
     if is_tts and voice_index:
         voice = TTS_VOICES[voice_index - 1]
         text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
-        # Clear previous chat history for a fresh TTS request.
         conversation = [{"role": "user", "content": text}]
     else:
         voice = None
-        # Remove any stray @tts tags and build the conversation history.
         text = text.replace(tts_prefix, "").strip()
         conversation = clean_chat_history(chat_history)
         conversation.append({"role": "user", "content": text})
-    # For multimodal input with image files
     if files:
         if len(files) > 1:
             images = [load_image(image) for image in files]
@@ -279,15 +277,21 @@ def generate(
         thread.start()
         buffer = ""
-        # Yield the initial animated progress bar with no text yet.
-        yield gr.HTML(progress_with_text(""))
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
-            time.sleep(0.01)
-            yield gr.HTML(progress_with_text(buffer))
     else:
-        # For text-only conversation
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
@@ -308,17 +312,21 @@ def generate(
         t = Thread(target=model.generate, kwargs=generation_kwargs)
         t.start()
-        buffer = ""
-        # Yield the initial animated progress bar with no text yet.
-        yield gr.HTML(progress_with_text(""))
         outputs = []
         for new_text in streamer:
             outputs.append(new_text)
             buffer = "".join(outputs)
-            yield gr.HTML(progress_with_text(buffer))
-        final_response = "".join(outputs)
-        yield gr.HTML(progress_with_text(final_response))
         # If TTS was requested, convert the final response to speech.
         if is_tts and voice:

 from transformers.image_utils import load_image
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
 DESCRIPTION = """
 # QwQ Edge 💬
 """
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# Define a helper function that returns HTML for a progress bar with a label.
+def progress_bar_html_with_label(label="Thinking..."):
+    return f"""
+    <div style="font-weight: bold; margin-bottom: 5px;">{label}</div>
+    <div id="progress-container" style="width: 100%; background-color: #eee; border-radius: 4px; overflow: hidden;">
+      <div id="progress-bar" style="width: 0%; height: 10px; background-color: limegreen; transition: width 0.1s;"></div>
+    </div>
+    <script>
+    (function() {{
+      let progressBar = document.getElementById("progress-bar");
+      let width = 0;
+      let interval = setInterval(function(){{
+          if(width < 100) {{
+              width += 1;
+              progressBar.style.width = width + "%";
+          }} else {{
+              clearInterval(interval);
+          }}
+      }}, 100);
+    }})();
+    </script>
+    """
 # Load text-only model and tokenizer
 model_id = "prithivMLmods/FastThink-0.5B-Tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
         seed = random.randint(0, MAX_SEED)
     return seed
 @spaces.GPU(duration=60, enable_queue=True)
 def generate_image_fn(
     prompt: str,
         batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
         if "negative_prompt" in batch_options and batch_options["negative_prompt"] is not None:
             batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
         if device.type == "cuda":
             with torch.autocast("cuda", dtype=torch.float16):
                 outputs = sd_pipe(**batch_options)
     text = input_dict["text"]
     files = input_dict.get("files", [])
+    # If the command is for image generation
     if text.strip().lower().startswith("@image"):
         prompt = text[len("@image"):].strip()
+        # Show animated progress bar with "Generating Image" label
+        yield gr.HTML(progress_bar_html_with_label("Generating Image"))
         image_paths, used_seed = generate_image_fn(
             prompt=prompt,
             negative_prompt="",
             use_resolution_binning=True,
             num_images=1,
         )
+        # After generation, yield only the image (progress bar no longer shown)
         yield gr.Image(image_paths[0])
         return  # Exit early
     if is_tts and voice_index:
         voice = TTS_VOICES[voice_index - 1]
         text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
         conversation = [{"role": "user", "content": text}]
     else:
         voice = None
         text = text.replace(tts_prefix, "").strip()
         conversation = clean_chat_history(chat_history)
         conversation.append({"role": "user", "content": text})
+    # Multimodal generation (with file inputs)
     if files:
         if len(files) > 1:
             images = [load_image(image) for image in files]
         thread.start()
         buffer = ""
+        # Show initial progress bar with label "Thinking..."
+        yield gr.HTML(progress_bar_html_with_label("Thinking..."))
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
+            # Update the message to show both the progress bar and current text output.
+            html = f"""
+            {progress_bar_html_with_label("Thinking...")}
+            <div style="margin-top: 10px;">{buffer}</div>
+            """
+            yield gr.HTML(html)
+        # Final output: only the generated text without the progress bar.
+        yield buffer
     else:
+        # Text-only generation
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         t = Thread(target=model.generate, kwargs=generation_kwargs)
         t.start()
         outputs = []
+        buffer = ""
+        # Show initial progress bar with label "Thinking..."
+        yield gr.HTML(progress_bar_html_with_label("Thinking..."))
         for new_text in streamer:
             outputs.append(new_text)
             buffer = "".join(outputs)
+            html = f"""
+            {progress_bar_html_with_label("Thinking...")}
+            <div style="margin-top: 10px;">{buffer}</div>
+            """
+            yield gr.HTML(html)
+        final_response = buffer
+        # Final output: just the final text.
+        yield final_response
         # If TTS was requested, convert the final response to speech.
         if is_tts and voice: