Spaces:

build-small-hackathon
/

LocalDuo

Running on Zero

App Files Files Community

shayekh commited on 13 days ago

Commit

f3a1cdf

verified ·

1 Parent(s): 8b261a9

Update app.py

Browse files

Files changed (1) hide show

app.py +179 -111

app.py CHANGED Viewed

@@ -151,7 +151,7 @@ def get_base64_image(image):
     return f"data:image/jpeg;base64,{img_str}"
 @spaces.GPU(duration=120)
-def extract_vocabulary(pdf_text, images, translit_lang, translit_format, target_lang, max_text_char=1500, repetition_penalty_val=1.1):
     """Use Transformers to extract vocabulary from text and images."""
     global model, processor
@@ -174,11 +174,16 @@ Return ONLY a valid JSON list of dictionaries, where each dictionary has four ke
 - 'transliteration' (the pronunciation transliterated into {translit_lang.upper()} script/characters, formatted as {translit_format}.{non_english})
 - 'translation' (the translation into {target_lang.upper()})
 - 'explanation' (a brief grammar or context note in {target_lang.upper()}).
-No markdown formatting, just raw JSON with ```json and ``` markers.
-CRITICAL: Do NOT provide any conversational filler, thinking steps, or reasoning. Answer quick without very long thinking. Output the JSON array IMMEDIATELY.
 Text:
 {pdf_text[:int(max_text_char)]}
 """
     # DEBUG: Log prompt text
@@ -209,6 +214,9 @@ Text:
     try:
         model.to("cuda")
         text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = processor(
             text=[text],
             images=pil_images if pil_images else None,
@@ -216,131 +224,105 @@ Text:
             padding=True
         ).to("cuda")
-        global_stop_thinking[0] = False
-        global_kill_threads[0] = False
-        print(f"[STOP-THINK] Flags RESET. stop_thinking={global_stop_thinking[0]}, kill={global_kill_threads[0]}")
         from transformers import TextIteratorStreamer, StoppingCriteria, StoppingCriteriaList
         from threading import Thread
-        class StopThinkingCriteria(StoppingCriteria):
             def __call__(self, input_ids, scores, **kwargs):
-                val = global_stop_thinking[0] or global_kill_threads[0]
-                if val:
-                    print(f"[STOP-THINK] Criteria returning True! stop={global_stop_thinking[0]} kill={global_kill_threads[0]}")
-                return val
-        streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
-        generation_kwargs = dict(
-            **inputs,
-            streamer=streamer,
-            max_new_tokens=2048*16,
-            do_sample=True,
-            repetition_penalty=repetition_penalty_val,
-            stopping_criteria=StoppingCriteriaList([StopThinkingCriteria()])
-        )
-        if len(images) > 0:
-            generation_kwargs.update(dict(temperature=0.6, top_p=0.95, top_k=20, min_p=0.0))
-        else:
-            generation_kwargs.update(dict(temperature=1.0, top_p=0.95, top_k=20, min_p=0.0))
-        generation_result = []
-        def generate_and_capture(**kwargs):
             try:
-                out = model.generate(**kwargs)
-                generation_result.append(out)
             except Exception as e:
                 import traceback
-                print(f"\n[THREAD1 ERROR] model.generate crashed: {e}")
                 traceback.print_exc()
-        thread = Thread(target=generate_and_capture, kwargs=generation_kwargs)
         thread.start()
-        output_text = ""
         for new_text in streamer:
             output_text += new_text
             yield output_text, None
-        thread.join()
-        if global_kill_threads[0]:
-            yield output_text + "\n\n[Generation completely stopped by user.]", None
-            return
-        if global_stop_thinking[0]:
-            global_stop_thinking[0] = False
-            print(f"[STOP-THINK] INJECTION PATH entered. Reset flag to: {global_stop_thinking[0]}")
-            # Inject the closure of thinking and start of JSON
-            injection_text = "\n</think>\n\n```json\n[\n"
-            output_text += injection_text
-            yield output_text, None
-            # Restart generation with updated context
-            generated_ids = generation_result[0]
-            injection_ids = processor.tokenizer(injection_text, return_tensors="pt", add_special_tokens=False).input_ids.to("cuda")
-            new_input_ids = torch.cat([generated_ids, injection_ids], dim=-1)
-            # Update attention mask
-            new_mask = torch.cat([
-                inputs["attention_mask"],
-                torch.ones((1, new_input_ids.shape[1] - inputs["attention_mask"].shape[1]), dtype=inputs["attention_mask"].dtype, device="cuda")
-            ], dim=-1)
-            new_inputs = {
-                "input_ids": new_input_ids,
-                "attention_mask": new_mask
-            }
-            # Carry over only the visual features; discard stale keys like input_token_type or rope_deltas
-            keys_to_keep = ["pixel_values", "image_grid_thw", "pixel_values_videos", "video_grid_thw"]
-            for k in keys_to_keep:
-                if k in inputs:
-                    new_inputs[k] = inputs[k]
-            class KillCriteria(StoppingCriteria):
-                def __call__(self, input_ids, scores, **kwargs):
-                    return global_kill_threads[0]
-            new_streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
-            new_generation_kwargs = dict(
-                **new_inputs,
-                streamer=new_streamer,
-                max_new_tokens=2048*16,
-                do_sample=True,
-                repetition_penalty=repetition_penalty_val,
-                stopping_criteria=StoppingCriteriaList([KillCriteria()])
-            )
-            if len(images) > 0:
-                new_generation_kwargs.update(dict(temperature=0.6, top_p=0.95, top_k=20, min_p=0.0))
-            else:
-                new_generation_kwargs.update(dict(temperature=1.0, top_p=0.95, top_k=20, min_p=0.0))
-            def thread2_target(**kwargs):
-                try:
-                    model.generate(**kwargs)
-                except Exception as e:
-                    import traceback
-                    print(f"\n[THREAD2 ERROR] model.generate crashed: {e}")
-                    traceback.print_exc()
-                finally:
-                    # Always unblock the streamer to prevent Gradio UI from hanging permanently
                     try:
-                        new_streamer.end()
-                    except Exception:
-                        pass
-            thread2 = Thread(target=thread2_target, kwargs=new_generation_kwargs)
-            thread2.start()
-            for new_text in new_streamer:
-                output_text += new_text
                 yield output_text, None
-            thread2.join()
         # DEBUG: Log raw output text
         with open("log/debug_vlm_output.txt", "w", encoding="utf-8") as f:
@@ -483,7 +465,7 @@ def process_pdf(pdf_file, url_input, translit_lang, translit_format, target_lang
     is_url = bool(url_input and url_input.strip())
     if pdf_file is None and not is_url:
-        yield "<p>Please upload a PDF or enter a URL.</p>", None, None, ""
         return
     if is_url:
@@ -890,6 +872,86 @@ def get_example_pdf():
             print(f"Failed to download example PDF: {e}")
     return file_path if os.path.exists(file_path) else None
 def create_demo():
     example_pdf = get_example_pdf()
@@ -1004,7 +1066,11 @@ def create_demo():
             with gr.Column(scale=1):
                 # url_input = gr.Textbox(label="Enter a Website URL 🌐", placeholder=r"e.g. https://storykorean.com/stories?level=beginner&story=tiger", value=r"https://storykorean.com/stories?level=beginner&story=tiger")
                 # https://www.bbc.com/korean/articles/c5yz89k5dw0o
-                url_input = gr.Textbox(label="Enter a Website URL 🌐", placeholder=r"e.g. https://www.koreanstudyjunkie.com/post/korean-reading-exercise-for-all-levels-beginner-intermediate-advanced", value=r"https://www.koreanstudyjunkie.com/post/korean-reading-exercise-for-all-levels-beginner-intermediate-advanced")
                 pdf_input = gr.File(label="Or Upload Book PDF 📚", file_types=[".pdf"], value=example_pdf)
@@ -1051,6 +1117,7 @@ def create_demo():
         )
         stop_thinking_btn.click(fn=set_stop_thinking, inputs=None, outputs=stop_thinking_btn, queue=False)
         stop_btn.click(fn=set_kill_threads, inputs=None, outputs=stop_btn, queue=False).then(fn=None, inputs=None, outputs=None, cancels=[generate_event])
         # Force autoscroll using Custom JS
@@ -1262,3 +1329,4 @@ if __name__ == "__main__":
         demo.launch(server_name="0.0.0.0", server_port=7865)

     return f"data:image/jpeg;base64,{img_str}"
 @spaces.GPU(duration=120)
+def extract_vocabulary(pdf_text, images, translit_lang, translit_format, target_lang, max_text_char=1500, repetition_penalty_val=1.1, partial_assistant_text=None):
     """Use Transformers to extract vocabulary from text and images."""
     global model, processor
 - 'transliteration' (the pronunciation transliterated into {translit_lang.upper()} script/characters, formatted as {translit_format}.{non_english})
 - 'translation' (the translation into {target_lang.upper()})
 - 'explanation' (a brief grammar or context note in {target_lang.upper()}).
+Just output raw JSON with ```json and ``` markers, as the user will load in python.
+CRITICAL: Answer quick without very long thinking. Output the JSON array IMMEDIATELY.
 Text:
+<scrpated-content>
 {pdf_text[:int(max_text_char)]}
+</scrpated-content>
 """
     # DEBUG: Log prompt text
     try:
         model.to("cuda")
         text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        if partial_assistant_text:
+            text += partial_assistant_text + "\n</think>\n\n```json\n[\n"
         inputs = processor(
             text=[text],
             images=pil_images if pil_images else None,
             padding=True
         ).to("cuda")
         from transformers import TextIteratorStreamer, StoppingCriteria, StoppingCriteriaList
         from threading import Thread
+        import queue
+        local_stop = [False]
+        class LocalKillCriteria(StoppingCriteria):
             def __call__(self, input_ids, scores, **kwargs):
+                return local_stop[0] or global_kill_threads[0]
+        def run_generation(cur_inputs, cur_streamer, cur_local_stop):
+            """Run model.generate in a thread, always calling streamer.end() on exit."""
+            kill_criteria = StoppingCriteriaList([LocalKillCriteria()])
+            gen_kwargs = dict(
+                **cur_inputs,
+                streamer=cur_streamer,
+                max_new_tokens=2048*16,
+                do_sample=True,
+                repetition_penalty=repetition_penalty_val,
+                stopping_criteria=kill_criteria
+            )
+            if len(images) > 0:
+                gen_kwargs.update(dict(temperature=0.6, top_p=0.95, top_k=20, min_p=0.0))
+            else:
+                gen_kwargs.update(dict(temperature=1.0, top_p=0.95, top_k=20, min_p=0.0))
             try:
+                model.generate(**gen_kwargs)
             except Exception as e:
                 import traceback
+                print(f"\n[THREAD ERROR] model.generate crashed: {e}")
                 traceback.print_exc()
+            finally:
+                try:
+                    cur_streamer.end()
+                except Exception:
+                    pass
+        output_text = partial_assistant_text + "\n</think>\n\n```json\n[\n" if partial_assistant_text else ""
+        streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
+        thread = Thread(target=run_generation, args=(inputs, streamer, local_stop))
         thread.start()
+        force_triggered = False
         for new_text in streamer:
             output_text += new_text
             yield output_text, None
+            # Check if user clicked "Stop thinking"
+            if global_stop_thinking[0] and not force_triggered:
+                force_triggered = True
+                print("[STOP-THINK] Flag detected inside streamer loop! Killing current generation...")
+                # 1. Kill the current generation thread
+                local_stop[0] = True
+                # Drain queue so the thread can exit
+                while not streamer.text_queue.empty():
                     try:
+                        streamer.text_queue.get_nowait()
+                    except queue.Empty:
+                        break
+                thread.join(timeout=5)
+                print("[STOP-THINK] Old thread joined. Starting forced JSON generation...")
+                # 2. Reset flags
+                global_stop_thinking[0] = False
+                local_stop[0] = False
+                # 3. Append the think-closing + JSON prefix
+                output_text += "\n</think>\n\n```json\n[\n"
                 yield output_text, None
+                # 4. Build new prompt with partial assistant text
+                text2 = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+                text2 += output_text
+                inputs2 = processor(
+                    text=[text2],
+                    images=pil_images if pil_images else None,
+                    return_tensors="pt",
+                    padding=True
+                ).to("cuda")
+                # 5. Start new generation thread
+                streamer2 = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
+                thread2 = Thread(target=run_generation, args=(inputs2, streamer2, local_stop))
+                thread2.start()
+                for new_text2 in streamer2:
+                    output_text += new_text2
+                    yield output_text, None
+                thread2.join(timeout=10)
+                break  # Exit the outer streamer loop
+        if not force_triggered:
+            thread.join()
+        # Reset flag in case it was set but generation finished naturally
+        global_stop_thinking[0] = False
         # DEBUG: Log raw output text
         with open("log/debug_vlm_output.txt", "w", encoding="utf-8") as f:
     is_url = bool(url_input and url_input.strip())
     if pdf_file is None and not is_url:
+        yield "<p>Please upload a PDF or enter a URL.</p>", None, None, "", "", []
         return
     if is_url:
             print(f"Failed to download example PDF: {e}")
     return file_path if os.path.exists(file_path) else None
+@spaces.GPU(duration=120)
+def process_pdf_force(partial_text, pdf_file, url_input, translit_lang, translit_format, target_lang, max_text_char, repetition_penalty_val, last_source_state, last_korean_words_state):
+    """Force JSON generation using the current partial stream_box text."""
+    is_url = bool(url_input and url_input.strip())
+    current_source_hash = ""
+    if is_url:
+        current_source_hash = "url:" + url_input.strip()
+    elif pdf_file is not None:
+        import hashlib
+        with open(pdf_file.name, "rb") as f:
+            current_source_hash = "pdf:" + hashlib.md5(f.read()).hexdigest()
+    try:
+        if is_url:
+            progress(0, desc="Fetching Website...")
+            content_text, images = extract_website_content(url_input.strip())
+        else:
+            progress(0, desc="Reading PDF...")
+            content_text, images = extract_pdf_content(pdf_file.name)
+        if not content_text.strip() and not images:
+            yield "<p>No content found.</p>", current_source_hash, None, partial_text, "", []
+            return
+    except Exception as e:
+        yield f"<p>Error reading content: {e}</p>", None, None, partial_text, "", []
+        return
+    vocab_list = []
+    stream_text = partial_text
+    progress(0.2, desc="Extracting vocabulary (Forced JSON)...")
+    for stream_t, v_list in extract_vocabulary(content_text, images, translit_lang, translit_format, target_lang, max_text_char, repetition_penalty_val, partial_assistant_text=partial_text):
+        stream_text = stream_t
+        if v_list is not None:
+            vocab_list = v_list
+        yield "", current_source_hash, None, stream_text, content_text, images
+    if not vocab_list:
+        yield "<p>Failed to parse forced JSON.</p>", current_source_hash, None, stream_text, content_text, images
+        return
+    progress(0.6, desc="Generating TTS audio...")
+    for i, item in enumerate(vocab_list):
+        korean = item.get("korean", "")
+        if korean and tts is not None:
+            progress(0.6 + 0.3 * (i / len(vocab_list)), desc=f"Generating audio {i+1}/{len(vocab_list)}...")
+            try:
+                wav, dur = tts.synthesize(
+                    korean, voice_style=voice_style, lang="ko",
+                    total_steps=12,
+                    speed=0.7,
+                )
+                import numpy as np
+                import soundfile as sf
+                audio_path = f"audio_{i}.wav"
+                sf.write(audio_path, wav, 24000)
+                item["audio_uri"] = numpy_to_base64_audio(wav, tts.sample_rate)
+            except Exception as e:
+                print(f"Failed to generate audio for {korean}: {e}")
+                item["audio_uri"] = None
+    progress(1.0, desc="Rendering flashcards...")
+    from jinja2 import Environment, BaseLoader
+    import json
+    env = Environment(loader=BaseLoader())
+    template = env.from_string(html_template)
+    html_output = template.render(
+        vocab_list=vocab_list,
+        translit_lang=translit_lang,
+        target_lang=target_lang
+    )
+    safe_srcdoc = html_output.replace('"', '&quot;')
+    yield f'<iframe srcdoc="{safe_srcdoc}" style="width: 100%; height: 650px; border: none; overflow-y: auto;"></iframe>', current_source_hash, vocab_list, stream_text, content_text, images
 def create_demo():
     example_pdf = get_example_pdf()
             with gr.Column(scale=1):
                 # url_input = gr.Textbox(label="Enter a Website URL 🌐", placeholder=r"e.g. https://storykorean.com/stories?level=beginner&story=tiger", value=r"https://storykorean.com/stories?level=beginner&story=tiger")
                 # https://www.bbc.com/korean/articles/c5yz89k5dw0o
+                # https://www.bbc.com/korean/articles/cn0p7rkvxdgo
+                # https://www.koreanstudyjunkie.com/post/korean-reading-exercise-for-all-levels-beginner-intermediate-advanced
+                url_input = gr.Textbox(label="Enter a Website URL 🌐",
+                placeholder=r"e.g. # https://www.bbc.com/korean/articles/cn0p7rkvxdgo",
+                value=r"https://www.bbc.com/korean/articles/cn0p7rkvxdgo")
                 pdf_input = gr.File(label="Or Upload Book PDF 📚", file_types=[".pdf"], value=example_pdf)
         )
         stop_thinking_btn.click(fn=set_stop_thinking, inputs=None, outputs=stop_thinking_btn, queue=False)
         stop_btn.click(fn=set_kill_threads, inputs=None, outputs=stop_btn, queue=False).then(fn=None, inputs=None, outputs=None, cancels=[generate_event])
         # Force autoscroll using Custom JS
         demo.launch(server_name="0.0.0.0", server_port=7865)