prithivMLmods commited on
Commit
63d52ce
·
verified ·
1 Parent(s): 24aaf5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -30
app.py CHANGED
@@ -29,20 +29,20 @@ from transformers.image_utils import load_image
29
  from gradio.themes import Soft
30
  from gradio.themes.utils import colors, fonts, sizes
31
 
32
- # Define a new "Thistle" color palette
33
  colors.thistle = colors.Color(
34
  name="thistle",
35
- c50="#FCF9FD",
36
- c100="#F5F0F8",
37
- c200="#EBE1F1",
38
- c300="#E1D1E9",
39
- c400="#D8BFD8", # Thistle Base
40
- c500="#C5A9C2",
41
- c600="#B194AC",
42
- c700="#9C7F96",
43
- c800="#876A80",
44
- c900="#72556A",
45
- c950="#5D4054",
46
  )
47
 
48
  colors.red_gray = colors.Color(
@@ -52,6 +52,7 @@ colors.red_gray = colors.Color(
52
  c800="#732d2d", c900="#5f2626", c950="#4d2020",
53
  )
54
 
 
55
  class ThistleTheme(Soft):
56
  def __init__(
57
  self,
@@ -80,10 +81,10 @@ class ThistleTheme(Soft):
80
  background_fill_primary_dark="*primary_900",
81
  body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
82
  body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
83
- button_primary_text_color="white",
84
- button_primary_text_color_hover="black",
85
- button_primary_background_fill="linear-gradient(90deg, *secondary_400, *secondary_500)",
86
- button_primary_background_fill_hover="linear-gradient(90deg, *secondary_300, *secondary_400)",
87
  button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
88
  button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
89
  button_secondary_text_color="black",
@@ -317,32 +318,35 @@ def generate_pdf(text: str, state: Dict[str, Any], max_new_tokens: int = 2048, t
317
  time.sleep(0.01)
318
  full_response += page_header + page_buffer + "\n\n"
319
 
 
320
  @spaces.GPU
321
- def generate_caption(image: Image.Image):
322
- """
323
- Generates a caption and attributes for a single image based on a standard system prompt.
324
- """
325
  if image is None:
326
- yield "Please upload an image to generate a caption."
327
  return
328
 
329
- system_prompt = "You are an AI assistant that rigorously follows this response protocol: For every input image, your primary task is to write a precise caption that captures the essence of the image in clear, concise, and contextually accurate language. Along with the caption, provide a structured set of attributes describing the visual elements, including details such as objects, people, actions, colors, environment, mood, and other notable characteristics. Ensure captions are precise, neutral, and descriptive, avoiding unnecessary elaboration or subjective interpretation unless explicitly required. Do not reference the rules or instructions in the output; only return the formatted caption, attributes, and class_name."
 
 
 
 
 
 
 
 
330
 
331
  messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": system_prompt}]}]
332
  prompt_full = processor_q3vl.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
333
-
334
  inputs = processor_q3vl(text=[prompt_full], images=[image], return_tensors="pt", padding=True).to(device)
335
-
336
  streamer = TextIteratorStreamer(processor_q3vl, skip_prompt=True, skip_special_tokens=True)
337
- generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": DEFAULT_MAX_NEW_TOKENS}
338
  thread = Thread(target=model_q3vl.generate, kwargs=generation_kwargs)
339
  thread.start()
340
-
341
  buffer = ""
342
  for new_text in streamer:
343
  buffer += new_text
344
  time.sleep(0.01)
345
- yield buffer
346
 
347
  # --- Gradio Interface ---
348
  image_examples = [["Describe the safety measures in the image. Conclude (Safe / Unsafe)..", "images/5.jpg"], ["Convert this page to doc [markdown] precisely.", "images/3.png"]]
@@ -380,10 +384,11 @@ with gr.Blocks(theme=thistle_theme, css=css) as demo:
380
  page_info = gr.HTML('<div style="text-align:center;">No file loaded</div>')
381
  next_page_btn = gr.Button("Next ▶")
382
 
 
383
  with gr.TabItem("Caption"):
384
- caption_image_upload = gr.Image(type="pil", label="Upload Image for Captioning", height=350)
385
  caption_submit = gr.Button("Generate Caption", variant="primary")
386
- caption_output = gr.Markdown(label="Generated Caption and Attributes")
387
 
388
  with gr.Accordion("Advanced options", open=False):
389
  max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
@@ -405,7 +410,9 @@ with gr.Blocks(theme=thistle_theme, css=css) as demo:
405
  pdf_upload.change(fn=load_and_preview_pdf, inputs=[pdf_upload], outputs=[pdf_preview_img, pdf_state, page_info])
406
  prev_page_btn.click(fn=lambda s: navigate_pdf_page("prev", s), inputs=[pdf_state], outputs=[pdf_preview_img, pdf_state, page_info])
407
  next_page_btn.click(fn=lambda s: navigate_pdf_page("next", s), inputs=[pdf_state], outputs=[pdf_preview_img, pdf_state, page_info])
408
- caption_submit.click(fn=generate_caption, inputs=[caption_image_upload], outputs=[caption_output])
 
 
409
 
410
  if __name__ == "__main__":
411
  demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)
 
29
  from gradio.themes import Soft
30
  from gradio.themes.utils import colors, fonts, sizes
31
 
32
+ # 1. Define the new "Thistle" color palette
33
  colors.thistle = colors.Color(
34
  name="thistle",
35
+ c50="#F9F5F9",
36
+ c100="#F0E8F1",
37
+ c200="#E7DBE8",
38
+ c300="#DECEE0",
39
+ c400="#D2BFD8",
40
+ c500="#D8BFD8", # Base color: Thistle
41
+ c600="#B59CB7",
42
+ c700="#927996",
43
+ c800="#6F5675",
44
+ c900="#4C3454",
45
+ c950="#291233",
46
  )
47
 
48
  colors.red_gray = colors.Color(
 
52
  c800="#732d2d", c900="#5f2626", c950="#4d2020",
53
  )
54
 
55
+ # 2. Create the new theme class using the Thistle palette
56
  class ThistleTheme(Soft):
57
  def __init__(
58
  self,
 
81
  background_fill_primary_dark="*primary_900",
82
  body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
83
  body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
84
+ button_primary_text_color="black",
85
+ button_primary_text_color_hover="white",
86
+ button_primary_background_fill="linear-gradient(90deg, *secondary_400, *secondary_400)",
87
+ button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_600)",
88
  button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
89
  button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
90
  button_secondary_text_color="black",
 
318
  time.sleep(0.01)
319
  full_response += page_header + page_buffer + "\n\n"
320
 
321
+ # 3. New backend function for the "Caption" tab
322
  @spaces.GPU
323
+ def generate_caption(image: Image.Image, max_new_tokens: int = 1024, temperature: float = 0.6, top_p: float = 0.9, top_k: int = 50, repetition_penalty: float = 1.2):
 
 
 
324
  if image is None:
325
+ yield "Please upload an image to caption.", "Please upload an image to caption."
326
  return
327
 
328
+ system_prompt = (
329
+ "You are an AI assistant that rigorously follows this response protocol: For every input image, your primary "
330
+ "task is to write a precise caption that captures the essence of the image in clear, concise, and contextually "
331
+ "accurate language. Along with the caption, provide a structured set of attributes describing the visual "
332
+ "elements, including details such as objects, people, actions, colors, environment, mood, and other notable "
333
+ "characteristics. Ensure captions are precise, neutral, and descriptive, avoiding unnecessary elaboration or "
334
+ "subjective interpretation unless explicitly required. Do not reference the rules or instructions in the output; "
335
+ "only return the formatted caption, attributes, and class_name."
336
+ )
337
 
338
  messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": system_prompt}]}]
339
  prompt_full = processor_q3vl.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
340
  inputs = processor_q3vl(text=[prompt_full], images=[image], return_tensors="pt", padding=True).to(device)
 
341
  streamer = TextIteratorStreamer(processor_q3vl, skip_prompt=True, skip_special_tokens=True)
342
+ generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
343
  thread = Thread(target=model_q3vl.generate, kwargs=generation_kwargs)
344
  thread.start()
 
345
  buffer = ""
346
  for new_text in streamer:
347
  buffer += new_text
348
  time.sleep(0.01)
349
+ yield buffer, buffer
350
 
351
  # --- Gradio Interface ---
352
  image_examples = [["Describe the safety measures in the image. Conclude (Safe / Unsafe)..", "images/5.jpg"], ["Convert this page to doc [markdown] precisely.", "images/3.png"]]
 
384
  page_info = gr.HTML('<div style="text-align:center;">No file loaded</div>')
385
  next_page_btn = gr.Button("Next ▶")
386
 
387
+ # 4. Add the new "Caption" tab to the UI
388
  with gr.TabItem("Caption"):
389
+ caption_image_upload = gr.Image(type="pil", label="Image to Caption", height=290)
390
  caption_submit = gr.Button("Generate Caption", variant="primary")
391
+
392
 
393
  with gr.Accordion("Advanced options", open=False):
394
  max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
 
410
  pdf_upload.change(fn=load_and_preview_pdf, inputs=[pdf_upload], outputs=[pdf_preview_img, pdf_state, page_info])
411
  prev_page_btn.click(fn=lambda s: navigate_pdf_page("prev", s), inputs=[pdf_state], outputs=[pdf_preview_img, pdf_state, page_info])
412
  next_page_btn.click(fn=lambda s: navigate_pdf_page("next", s), inputs=[pdf_state], outputs=[pdf_preview_img, pdf_state, page_info])
413
+
414
+ # 5. Add the event handler for the new caption button
415
+ caption_submit.click(fn=generate_caption, inputs=[caption_image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty], outputs=[output, markdown_output])
416
 
417
  if __name__ == "__main__":
418
  demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)