Spaces:

Frederic-CellNum
/

sparrow-OCR

Sleeping

App Files Files Community

Frederic-CellNum commited on Dec 7, 2025

Commit

fc672ff

verified ·

1 Parent(s): ab64317

Update app.py

Browse files

Files changed (1) hide show

app.py +185 -8

app.py CHANGED Viewed

@@ -1,3 +1,158 @@
 import gradio as gr
 import spaces
 from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
@@ -59,18 +214,39 @@ def array_to_image_path(image_filepath, max_width=1250, max_height=1750):
     return full_path, new_width, new_height
-# Initialize the model and processor globally to optimize performance
-model = Qwen2VLForConditionalGeneration.from_pretrained(
-    "Qwen/Qwen2-VL-7B-Instruct",
-    torch_dtype="auto",
-    device_map="auto"
-)
-processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
 @spaces.GPU
 def run_inference(input_imgs, text_input):
     results = []
     for image in input_imgs:
@@ -125,7 +301,8 @@ def run_inference(input_imgs, text_input):
             print("Processed: " + image)
         finally:
             # Clean up the temporary image file
-            os.remove(image_path)
     return results

+# import gradio as gr
+# import spaces
+# from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+# from qwen_vl_utils import process_vision_info
+# from PIL import Image
+# from datetime import datetime
+# import os
+# # subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+# DESCRIPTION = "[Sparrow Qwen2-VL-7B Backend](https://github.com/katanaml/sparrow)"
+# def array_to_image_path(image_filepath, max_width=1250, max_height=1750):
+#     if image_filepath is None:
+#         raise ValueError("No image provided. Please upload an image before submitting.")
+#     # Open the uploaded image using its filepath
+#     img = Image.open(image_filepath)
+#     # Extract the file extension from the uploaded file
+#     input_image_extension = image_filepath.split('.')[-1].lower()  # Extract extension from filepath
+#     # Set file extension based on the original file, otherwise default to PNG
+#     if input_image_extension in ['jpg', 'jpeg', 'png']:
+#         file_extension = input_image_extension
+#     else:
+#         file_extension = 'png'  # Default to PNG if extension is unavailable or invalid
+#     # Get the current dimensions of the image
+#     width, height = img.size
+#     # Initialize new dimensions to current size
+#     new_width, new_height = width, height
+#     # Check if the image exceeds the maximum dimensions
+#     if width > max_width or height > max_height:
+#         # Calculate the new size, maintaining the aspect ratio
+#         aspect_ratio = width / height
+#         if width > max_width:
+#             new_width = max_width
+#             new_height = int(new_width / aspect_ratio)
+#         if new_height > max_height:
+#             new_height = max_height
+#             new_width = int(new_height * aspect_ratio)
+#     # Generate a unique filename using timestamp
+#     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+#     filename = f"image_{timestamp}.{file_extension}"
+#     # Save the image
+#     img.save(filename)
+#     # Get the full path of the saved image
+#     full_path = os.path.abspath(filename)
+#     return full_path, new_width, new_height
+# # Initialize the model and processor globally to optimize performance
+# model = Qwen2VLForConditionalGeneration.from_pretrained(
+#     "Qwen/Qwen2-VL-7B-Instruct",
+#     torch_dtype="auto",
+#     device_map="auto"
+# )
+# processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
+# @spaces.GPU
+# def run_inference(input_imgs, text_input):
+#     results = []
+#     for image in input_imgs:
+#         # Convert each image to the required format
+#         image_path, width, height = array_to_image_path(image)
+#         try:
+#             # Prepare messages for each image
+#             messages = [
+#                 {
+#                     "role": "user",
+#                     "content": [
+#                         {
+#                             "type": "image",
+#                             "image": image_path,
+#                             "resized_height": height,
+#                             "resized_width": width
+#                         },
+#                         {
+#                             "type": "text",
+#                             "text": text_input
+#                         }
+#                     ]
+#                 }
+#             ]
+#             # Prepare inputs for the model
+#             text = processor.apply_chat_template(
+#                 messages, tokenize=False, add_generation_prompt=True
+#             )
+#             image_inputs, video_inputs = process_vision_info(messages)
+#             inputs = processor(
+#                 text=[text],
+#                 images=image_inputs,
+#                 videos=video_inputs,
+#                 padding=True,
+#                 return_tensors="pt",
+#             )
+#             inputs = inputs.to("cuda")
+#             # Generate inference output
+#             generated_ids = model.generate(**inputs, max_new_tokens=4096)
+#             generated_ids_trimmed = [
+#                 out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+#             ]
+#             raw_output = processor.batch_decode(
+#                 generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=True
+#             )
+#             results.append(raw_output[0])
+#             print("Processed: " + image)
+#         finally:
+#             # Clean up the temporary image file
+#             os.remove(image_path)
+#     return results
+# css = """
+#   #output {
+#     height: 500px;
+#     overflow: auto;
+#     border: 1px solid #ccc;
+#   }
+# """
+# with gr.Blocks(css=css) as demo:
+#     gr.Markdown(DESCRIPTION)
+#     with gr.Tab(label="Qwen2-VL-7B Input"):
+#         with gr.Row():
+#             with gr.Column():
+#                 input_imgs = gr.Files(file_types=["image"], label="Upload Document Images")
+#                 text_input = gr.Textbox(label="Query")
+#                 submit_btn = gr.Button(value="Submit", variant="primary")
+#             with gr.Column():
+#                 output_text = gr.Textbox(label="Response")
+#         submit_btn.click(run_inference, [input_imgs, text_input], [output_text])
+# demo.queue(api_open=True)
+# demo.launch(debug=True)
 import gradio as gr
 import spaces
 from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
     return full_path, new_width, new_height
+# CORRECTION: Ne pas initialiser le modèle dans le scope global
+# À la place, on va le charger dans la fonction décorée avec @spaces.GPU
+# Cache global pour éviter de recharger le modèle à chaque appel
+_model_cache = {}
+def get_model_and_processor():
+    """
+    Charge le modèle et le processeur une seule fois et les met en cache
+    Cette fonction doit être appelée UNIQUEMENT dans une fonction @spaces.GPU
+    """
+    if 'model' not in _model_cache:
+        print("Chargement du modèle Qwen2-VL-7B-Instruct...")
+        _model_cache['model'] = Qwen2VLForConditionalGeneration.from_pretrained(
+            "Qwen/Qwen2-VL-7B-Instruct",
+            torch_dtype="auto",
+            device_map="auto"
+        )
+        _model_cache['processor'] = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
+        print("Modèle chargé avec succès!")
+    return _model_cache['model'], _model_cache['processor']
 @spaces.GPU
 def run_inference(input_imgs, text_input):
+    """
+    CORRECTION: Le modèle est maintenant chargé ICI, à l'intérieur de la fonction @spaces.GPU
+    Cela évite l'erreur "CUDA must not be initialized in the main process"
+    """
+    # Charger le modèle et le processeur dans la fonction GPU
+    model, processor = get_model_and_processor()
     results = []
     for image in input_imgs:
             print("Processed: " + image)
         finally:
             # Clean up the temporary image file
+            if os.path.exists(image_path):
+                os.remove(image_path)
     return results