Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,6 @@ import io
|
|
| 10 |
from threading import Thread
|
| 11 |
from reportlab.lib.pagesizes import A4
|
| 12 |
from reportlab.lib.styles import getSampleStyleSheet
|
| 13 |
-
from reportlab.lib import colors
|
| 14 |
from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer
|
| 15 |
from reportlab.lib.units import inch
|
| 16 |
from reportlab.pdfbase import pdfmetrics
|
|
@@ -64,6 +63,21 @@ def identify_and_save_blob(blob_path):
|
|
| 64 |
except Exception as e:
|
| 65 |
raise ValueError(f"An error occurred while processing the file: {e}")
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
@spaces.GPU
|
| 68 |
def qwen_inference(model_name, media_input, text_input=None):
|
| 69 |
"""Handles inference for the selected model."""
|
|
@@ -72,7 +86,6 @@ def qwen_inference(model_name, media_input, text_input=None):
|
|
| 72 |
|
| 73 |
# Determine media type and obtain a file path if needed
|
| 74 |
if isinstance(media_input, str):
|
| 75 |
-
# If the input is a file path, check extension
|
| 76 |
media_path = media_input
|
| 77 |
if media_path.endswith(tuple(image_extensions.keys())):
|
| 78 |
media_type = "image"
|
|
@@ -83,13 +96,7 @@ def qwen_inference(model_name, media_input, text_input=None):
|
|
| 83 |
raise ValueError("Unsupported media type. Please upload a valid image.")
|
| 84 |
else:
|
| 85 |
# media_input is a PIL image (or numpy array) coming from gr.Image
|
| 86 |
-
|
| 87 |
-
# In case gr.Image returns a numpy array, convert it.
|
| 88 |
-
media_input = Image.fromarray(media_input)
|
| 89 |
-
# Save the image temporarily to disk
|
| 90 |
-
temp_filename = f"temp_{uuid.uuid4()}.png"
|
| 91 |
-
media_input.save(temp_filename)
|
| 92 |
-
media_path = temp_filename
|
| 93 |
media_type = "image"
|
| 94 |
|
| 95 |
messages = [
|
|
@@ -133,12 +140,13 @@ def qwen_inference(model_name, media_input, text_input=None):
|
|
| 133 |
|
| 134 |
def format_plain_text(output_text):
|
| 135 |
"""Formats the output text as plain text without LaTeX delimiters."""
|
| 136 |
-
# Remove LaTeX delimiters and convert to plain text
|
| 137 |
plain_text = output_text.replace("\\(", "").replace("\\)", "").replace("\\[", "").replace("\\]", "")
|
| 138 |
return plain_text
|
| 139 |
|
| 140 |
-
def generate_document(
|
| 141 |
"""Generates a document with the input image and plain text output."""
|
|
|
|
|
|
|
| 142 |
plain_text = format_plain_text(output_text)
|
| 143 |
if file_format == "pdf":
|
| 144 |
return generate_pdf(media_path, plain_text, font_choice, font_size, line_spacing, alignment, image_size)
|
|
@@ -248,7 +256,6 @@ with gr.Blocks(css=css) as demo:
|
|
| 248 |
gr.Markdown("# Qwen2VL: Compact Vision & Language Processing")
|
| 249 |
|
| 250 |
with gr.Tab(label="Image Input"):
|
| 251 |
-
|
| 252 |
with gr.Row():
|
| 253 |
with gr.Column():
|
| 254 |
model_choice = gr.Dropdown(
|
|
@@ -262,7 +269,6 @@ with gr.Blocks(css=css) as demo:
|
|
| 262 |
)
|
| 263 |
text_input = gr.Textbox(label="Question", placeholder="Ask a question about the image...")
|
| 264 |
submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
|
| 265 |
-
|
| 266 |
with gr.Column():
|
| 267 |
output_text = gr.Textbox(label="Output Text", lines=10)
|
| 268 |
plain_text_output = gr.Textbox(label="Standardized Plain Text", lines=10)
|
|
@@ -347,12 +353,12 @@ with gr.Blocks(css=css) as demo:
|
|
| 347 |
label="Image Size"
|
| 348 |
)
|
| 349 |
file_format = gr.Radio(["pdf", "docx"], label="File Format", value="pdf")
|
| 350 |
-
|
| 351 |
with gr.Row():
|
| 352 |
get_document_btn = gr.Button(value="Get Document", elem_classes="download-btn")
|
| 353 |
-
|
| 354 |
get_document_btn.click(
|
| 355 |
-
generate_document,
|
|
|
|
|
|
|
| 356 |
)
|
| 357 |
|
| 358 |
demo.launch(debug=True)
|
|
|
|
| 10 |
from threading import Thread
|
| 11 |
from reportlab.lib.pagesizes import A4
|
| 12 |
from reportlab.lib.styles import getSampleStyleSheet
|
|
|
|
| 13 |
from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer
|
| 14 |
from reportlab.lib.units import inch
|
| 15 |
from reportlab.pdfbase import pdfmetrics
|
|
|
|
| 63 |
except Exception as e:
|
| 64 |
raise ValueError(f"An error occurred while processing the file: {e}")
|
| 65 |
|
| 66 |
+
def get_media_file(media_input):
|
| 67 |
+
"""
|
| 68 |
+
Ensures that the media input is a file path.
|
| 69 |
+
If it is a PIL image, it saves it temporarily and returns the file path.
|
| 70 |
+
"""
|
| 71 |
+
if isinstance(media_input, str):
|
| 72 |
+
return media_input # Already a file path
|
| 73 |
+
else:
|
| 74 |
+
if not isinstance(media_input, Image.Image):
|
| 75 |
+
# Convert numpy array to PIL image if needed
|
| 76 |
+
media_input = Image.fromarray(media_input)
|
| 77 |
+
temp_filename = f"temp_{uuid.uuid4()}.png"
|
| 78 |
+
media_input.save(temp_filename)
|
| 79 |
+
return temp_filename
|
| 80 |
+
|
| 81 |
@spaces.GPU
|
| 82 |
def qwen_inference(model_name, media_input, text_input=None):
|
| 83 |
"""Handles inference for the selected model."""
|
|
|
|
| 86 |
|
| 87 |
# Determine media type and obtain a file path if needed
|
| 88 |
if isinstance(media_input, str):
|
|
|
|
| 89 |
media_path = media_input
|
| 90 |
if media_path.endswith(tuple(image_extensions.keys())):
|
| 91 |
media_type = "image"
|
|
|
|
| 96 |
raise ValueError("Unsupported media type. Please upload a valid image.")
|
| 97 |
else:
|
| 98 |
# media_input is a PIL image (or numpy array) coming from gr.Image
|
| 99 |
+
media_path = get_media_file(media_input)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
media_type = "image"
|
| 101 |
|
| 102 |
messages = [
|
|
|
|
| 140 |
|
| 141 |
def format_plain_text(output_text):
|
| 142 |
"""Formats the output text as plain text without LaTeX delimiters."""
|
|
|
|
| 143 |
plain_text = output_text.replace("\\(", "").replace("\\)", "").replace("\\[", "").replace("\\]", "")
|
| 144 |
return plain_text
|
| 145 |
|
| 146 |
+
def generate_document(media_input, output_text, file_format, font_choice, font_size, line_spacing, alignment, image_size):
|
| 147 |
"""Generates a document with the input image and plain text output."""
|
| 148 |
+
# Ensure media_input is a file path.
|
| 149 |
+
media_path = get_media_file(media_input)
|
| 150 |
plain_text = format_plain_text(output_text)
|
| 151 |
if file_format == "pdf":
|
| 152 |
return generate_pdf(media_path, plain_text, font_choice, font_size, line_spacing, alignment, image_size)
|
|
|
|
| 256 |
gr.Markdown("# Qwen2VL: Compact Vision & Language Processing")
|
| 257 |
|
| 258 |
with gr.Tab(label="Image Input"):
|
|
|
|
| 259 |
with gr.Row():
|
| 260 |
with gr.Column():
|
| 261 |
model_choice = gr.Dropdown(
|
|
|
|
| 269 |
)
|
| 270 |
text_input = gr.Textbox(label="Question", placeholder="Ask a question about the image...")
|
| 271 |
submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
|
|
|
|
| 272 |
with gr.Column():
|
| 273 |
output_text = gr.Textbox(label="Output Text", lines=10)
|
| 274 |
plain_text_output = gr.Textbox(label="Standardized Plain Text", lines=10)
|
|
|
|
| 353 |
label="Image Size"
|
| 354 |
)
|
| 355 |
file_format = gr.Radio(["pdf", "docx"], label="File Format", value="pdf")
|
|
|
|
| 356 |
with gr.Row():
|
| 357 |
get_document_btn = gr.Button(value="Get Document", elem_classes="download-btn")
|
|
|
|
| 358 |
get_document_btn.click(
|
| 359 |
+
generate_document,
|
| 360 |
+
[input_media, output_text, file_format, font_choice, font_size, line_spacing, alignment, image_size],
|
| 361 |
+
gr.File(label="Download Document")
|
| 362 |
)
|
| 363 |
|
| 364 |
demo.launch(debug=True)
|