core-OCR

Paused

App Files Files Community

prithivMLmods commited on Mar 6

Commit

4fa981c

verified ·

1 Parent(s): ebda378

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -38

app.py CHANGED Viewed

@@ -47,24 +47,6 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# Updated function with optimized progress UI
-def progress_bar_html(message: str) -> str:
-    return f"""
-<div style="display: flex; align-items: center; justify-content: center; margin: 10px 0;">
-  <span style="margin-right: 10px; font-weight: bold; color: #333;">{message}</span>
-  <div style="position: relative; width: 200px; height: 10px; background-color: #e0e0e0; border-radius: 5px; overflow: hidden;">
-    <div style="position: absolute; width: 100%; height: 100%; background: linear-gradient(90deg, #76c7c0, #4caf50); animation: loading 2s ease-in-out infinite;"></div>
-  </div>
-</div>
-<style>
-@keyframes loading {{
-    0% {{ transform: translateX(-100%); }}
-    50% {{ transform: translateX(0%); }}
-    100% {{ transform: translateX(100%); }}
-}}
-</style>
-"""
 # Load text-only model and tokenizer
 model_id = "prithivMLmods/FastThink-0.5B-Tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -80,7 +62,7 @@ TTS_VOICES = [
     "en-US-GuyNeural",    # @tts2
 ]
-MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 model_m = Qwen2VLForConditionalGeneration.from_pretrained(
     MODEL_ID,
@@ -146,6 +128,26 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
         seed = random.randint(0, MAX_SEED)
     return seed
 @spaces.GPU(duration=60, enable_queue=True)
 def generate_image_fn(
     prompt: str,
@@ -214,11 +216,11 @@ def generate(
     text = input_dict["text"]
     files = input_dict.get("files", [])
-    # Handle image generation command
     if text.strip().lower().startswith("@image"):
         prompt = text[len("@image"):].strip()
         # Show animated progress bar for image generation
-        yield gr.HTML(progress_bar_html("Generating Image"))
         image_paths, used_seed = generate_image_fn(
             prompt=prompt,
             negative_prompt="",
@@ -232,7 +234,7 @@ def generate(
             use_resolution_binning=True,
             num_images=1,
         )
-        # Replace the progress bar with the generated image
         yield gr.Image(image_paths[0])
         return  # Exit early
@@ -252,7 +254,6 @@ def generate(
         conversation = clean_chat_history(chat_history)
         conversation.append({"role": "user", "content": text})
-    # For multimodal chat with files (e.g. image + text)
     if files:
         if len(files) > 1:
             images = [load_image(image) for image in files]
@@ -275,17 +276,13 @@ def generate(
         thread.start()
         buffer = ""
-        # Show progress bar for thinking
-        yield gr.HTML(progress_bar_html("Thinking..."))
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
             time.sleep(0.01)
-            # Update with current text plus progress bar
-            interim_html = f"<div>{buffer}</div><div>{progress_bar_html('Thinking...')}</div>"
-            yield gr.HTML(interim_html)
-        # Final output without the progress bar
-        yield gr.HTML(f"<div>{buffer}</div>")
     else:
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
@@ -304,19 +301,18 @@ def generate(
             "num_beams": 1,
             "repetition_penalty": repetition_penalty,
         }
-        thread = Thread(target=model.generate, kwargs=generation_kwargs)
-        thread.start()
         outputs = []
-        # Show progress bar for thinking
-        yield gr.HTML(progress_bar_html("Thinking..."))
         for new_text in streamer:
             outputs.append(new_text)
-            interim_html = f"<div>{''.join(outputs)}</div><div>{progress_bar_html('Thinking...')}</div>"
-            yield gr.HTML(interim_html)
         final_response = "".join(outputs)
-        # Final output without progress bar
-        yield gr.HTML(f"<div>{final_response}</div>")
         # If TTS was requested, convert the final response to speech.
         if is_tts and voice:

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 # Load text-only model and tokenizer
 model_id = "prithivMLmods/FastThink-0.5B-Tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
     "en-US-GuyNeural",    # @tts2
 ]
+MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
 processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 model_m = Qwen2VLForConditionalGeneration.from_pretrained(
     MODEL_ID,
         seed = random.randint(0, MAX_SEED)
     return seed
+def progress_bar_html(label: str) -> str:
+    """
+    Returns an HTML snippet for a thin progress bar with a label.
+    The progress bar is styled as a dark red animated bar.
+    """
+    return f'''
+<div style="display: flex; align-items: center;">
+    <span style="margin-right: 10px; font-size: 14px;">{label}</span>
+    <div style="width: 110px; height: 5px; background-color: #f0f0f0; border-radius: 2px; overflow: hidden;">
+        <div style="width: 100%; height: 100%; background-color: darkred; animation: loading 1.5s linear infinite;"></div>
+    </div>
+</div>
+<style>
+@keyframes loading {{
+    0% {{ transform: translateX(-100%); }}
+    100% {{ transform: translateX(100%); }}
+}}
+</style>
+    '''
 @spaces.GPU(duration=60, enable_queue=True)
 def generate_image_fn(
     prompt: str,
     text = input_dict["text"]
     files = input_dict.get("files", [])
     if text.strip().lower().startswith("@image"):
+        # Remove the "@image" tag and use the rest as prompt
         prompt = text[len("@image"):].strip()
         # Show animated progress bar for image generation
+        yield progress_bar_html("Generating Image")
         image_paths, used_seed = generate_image_fn(
             prompt=prompt,
             negative_prompt="",
             use_resolution_binning=True,
             num_images=1,
         )
+        # Once done, yield the generated image
         yield gr.Image(image_paths[0])
         return  # Exit early
         conversation = clean_chat_history(chat_history)
         conversation.append({"role": "user", "content": text})
     if files:
         if len(files) > 1:
             images = [load_image(image) for image in files]
         thread.start()
         buffer = ""
+        # Show animated progress bar for multimodal generation
+        yield progress_bar_html("Thinking...")
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
             time.sleep(0.01)
+            yield buffer
     else:
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             "num_beams": 1,
             "repetition_penalty": repetition_penalty,
         }
+        t = Thread(target=model.generate, kwargs=generation_kwargs)
+        t.start()
         outputs = []
+        # Show animated progress bar for text generation
+        yield progress_bar_html("Thinking...")
         for new_text in streamer:
             outputs.append(new_text)
+            yield "".join(outputs)
         final_response = "".join(outputs)
+        yield final_response
         # If TTS was requested, convert the final response to speech.
         if is_tts and voice: