core-OCR

Paused

App Files Files Community

prithivMLmods commited on Mar 6

Commit

7be0e24

verified ·

1 Parent(s): 9604e47

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -62

app.py CHANGED Viewed

@@ -47,29 +47,6 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# Define a helper function that returns HTML for a progress bar with a label.
-def progress_bar_html_with_label(label="Thinking..."):
-    return f"""
-    <div style="font-weight: bold; margin-bottom: 5px;">{label}</div>
-    <div id="progress-container" style="width: 100%; background-color: #eee; border-radius: 4px; overflow: hidden;">
-      <div id="progress-bar" style="width: 0%; height: 10px; background-color: limegreen; transition: width 0.1s;"></div>
-    </div>
-    <script>
-    (function() {{
-      let progressBar = document.getElementById("progress-bar");
-      let width = 0;
-      let interval = setInterval(function(){{
-          if(width < 100) {{
-              width += 1;
-              progressBar.style.width = width + "%";
-          }} else {{
-              clearInterval(interval);
-          }}
-      }}, 100);
-    }})();
-    </script>
-    """
 # Load text-only model and tokenizer
 model_id = "prithivMLmods/FastThink-0.5B-Tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -110,6 +87,22 @@ def clean_chat_history(chat_history):
             cleaned.append(msg)
     return cleaned
 # Environment variables and parameters for Stable Diffusion XL
 MODEL_ID_SD = os.getenv("MODEL_VAL_PATH")  # SDXL Model repository path via env variable
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
@@ -214,29 +207,44 @@ def generate(
     Special commands:
       - "@tts1" or "@tts2": triggers text-to-speech.
       - "@image": triggers image generation using the SDXL pipeline.
     """
     text = input_dict["text"]
     files = input_dict.get("files", [])
-    # If the command is for image generation
     if text.strip().lower().startswith("@image"):
         prompt = text[len("@image"):].strip()
-        # Show animated progress bar with "Generating Image" label
-        yield gr.HTML(progress_bar_html_with_label("Generating Image"))
-        image_paths, used_seed = generate_image_fn(
-            prompt=prompt,
-            negative_prompt="",
-            use_negative_prompt=False,
-            seed=1,
-            width=1024,
-            height=1024,
-            guidance_scale=3,
-            num_inference_steps=25,
-            randomize_seed=True,
-            use_resolution_binning=True,
-            num_images=1,
-        )
-        # After generation, yield only the image (progress bar no longer shown)
         yield gr.Image(image_paths[0])
         return  # Exit early
@@ -247,14 +255,16 @@ def generate(
     if is_tts and voice_index:
         voice = TTS_VOICES[voice_index - 1]
         text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
         conversation = [{"role": "user", "content": text}]
     else:
         voice = None
         text = text.replace(tts_prefix, "").strip()
         conversation = clean_chat_history(chat_history)
         conversation.append({"role": "user", "content": text})
-    # Multimodal generation (with file inputs)
     if files:
         if len(files) > 1:
             images = [load_image(image) for image in files]
@@ -277,21 +287,20 @@ def generate(
         thread.start()
         buffer = ""
-        # Show initial progress bar with label "Thinking..."
-        yield gr.HTML(progress_bar_html_with_label("Thinking..."))
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
-            # Update the message to show both the progress bar and current text output.
-            html = f"""
-            {progress_bar_html_with_label("Thinking...")}
-            <div style="margin-top: 10px;">{buffer}</div>
-            """
-            yield gr.HTML(html)
-        # Final output: only the generated text without the progress bar.
         yield buffer
     else:
-        # Text-only generation
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
@@ -313,19 +322,18 @@ def generate(
         t.start()
         outputs = []
-        buffer = ""
-        # Show initial progress bar with label "Thinking..."
-        yield gr.HTML(progress_bar_html_with_label("Thinking..."))
         for new_text in streamer:
             outputs.append(new_text)
-            buffer = "".join(outputs)
-            html = f"""
-            {progress_bar_html_with_label("Thinking...")}
-            <div style="margin-top: 10px;">{buffer}</div>
-            """
-            yield gr.HTML(html)
-        final_response = buffer
-        # Final output: just the final text.
         yield final_response
         # If TTS was requested, convert the final response to speech.

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 # Load text-only model and tokenizer
 model_id = "prithivMLmods/FastThink-0.5B-Tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
             cleaned.append(msg)
     return cleaned
+# A helper function to render a progress bar using HTML.
+def render_progress_bar(label: str, progress: int, output_text: str = "") -> str:
+    """
+    Returns an HTML snippet containing a label, a progress bar (red background with a green inner bar),
+    and optionally some output text.
+    """
+    return f'''
+    <div style="margin-bottom: 10px;">
+      <div style="font-weight: bold; margin-bottom: 5px;">{label}</div>
+      <div style="width: 100%; background-color: red; border-radius: 5px; overflow: hidden; height: 10px;">
+        <div style="width: {progress}%; background-color: green; height: 100%; transition: width 0.3s;"></div>
+      </div>
+      <div style="margin-top: 10px;">{output_text}</div>
+    </div>
+    '''
 # Environment variables and parameters for Stable Diffusion XL
 MODEL_ID_SD = os.getenv("MODEL_VAL_PATH")  # SDXL Model repository path via env variable
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
     Special commands:
       - "@tts1" or "@tts2": triggers text-to-speech.
       - "@image": triggers image generation using the SDXL pipeline.
+    Instead of yielding a simple "Thinking..." text, an animated progress bar is shown (via an HTML snippet)
+    that goes from red to green. When the inference is complete the progress bar is replaced by the final result.
     """
     text = input_dict["text"]
     files = input_dict.get("files", [])
+    # Image generation branch
     if text.strip().lower().startswith("@image"):
         prompt = text[len("@image"):].strip()
+        # Use a container to capture the result from the thread.
+        result_container = []
+        def run_image():
+            result_container.append(generate_image_fn(
+                prompt=prompt,
+                negative_prompt="",
+                use_negative_prompt=False,
+                seed=1,
+                width=1024,
+                height=1024,
+                guidance_scale=3,
+                num_inference_steps=25,
+                randomize_seed=True,
+                use_resolution_binning=True,
+                num_images=1,
+            ))
+        thread = Thread(target=run_image)
+        thread.start()
+        start_time = time.time()
+        # Simulate progress bar updates while image generation is running.
+        while thread.is_alive():
+            progress = min(95, int((time.time() - start_time) / 5 * 95))
+            yield render_progress_bar("Generating Image", progress)
+            time.sleep(0.5)
+        thread.join()
+        # Final update before showing the result.
+        yield render_progress_bar("Generating Image", 100)
+        image_paths, used_seed = result_container[0]
         yield gr.Image(image_paths[0])
         return  # Exit early
     if is_tts and voice_index:
         voice = TTS_VOICES[voice_index - 1]
         text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
+        # Clear previous chat history for a fresh TTS request.
         conversation = [{"role": "user", "content": text}]
     else:
         voice = None
+        # Remove any stray @tts tags and build the conversation history.
         text = text.replace(tts_prefix, "").strip()
         conversation = clean_chat_history(chat_history)
         conversation.append({"role": "user", "content": text})
+    # Multimodal (image + text) branch
     if files:
         if len(files) > 1:
             images = [load_image(image) for image in files]
         thread.start()
         buffer = ""
+        start_time = time.time()
+        # Initial progress bar for multimodal inference.
+        yield render_progress_bar("Thinking...", 0)
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
+            progress = min(95, int((time.time() - start_time) / 5 * 95))
+            yield render_progress_bar("Thinking...", progress, output_text=buffer)
+        # Final progress update (100%).
+        yield render_progress_bar("Thinking...", 100, output_text=buffer)
+        # Then yield final response (progress bar update no longer shown).
         yield buffer
     else:
+        # Text-only generation branch.
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         t.start()
         outputs = []
+        start_time = time.time()
+        # Initial progress bar update.
+        yield render_progress_bar("Thinking...", 0)
         for new_text in streamer:
             outputs.append(new_text)
+            current_text = "".join(outputs)
+            progress = min(95, int((time.time() - start_time) / 5 * 95))
+            yield render_progress_bar("Thinking...", progress, output_text=current_text)
+        final_response = "".join(outputs)
+        # Final update (100% progress).
+        yield render_progress_bar("Thinking...", 100, output_text=final_response)
+        # Finally, yield the final plain response so the progress bar disappears.
         yield final_response
         # If TTS was requested, convert the final response to speech.