Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,7 +30,6 @@ def resolve_model(chosen, custom):
|
|
| 30 |
return chosen
|
| 31 |
|
| 32 |
# --- Main inference function ---
|
| 33 |
-
# If you are using ZeroGPU on Hugging Face Spaces, make sure to set the environment variable USE_GPU=1.
|
| 34 |
# The @spaces.GPU() decorator ensures that heavy inference runs on GPU in a ZeroGPU Space.
|
| 35 |
@spaces.GPU()
|
| 36 |
def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, model2_choice, model2_custom):
|
|
@@ -42,18 +41,15 @@ def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, mo
|
|
| 42 |
device = 0 if os.environ.get("USE_GPU", "0") == "1" else -1
|
| 43 |
|
| 44 |
# Create pipelines for image-to-text.
|
| 45 |
-
#
|
| 46 |
-
# We use the "image-to-text" task here so that the prompt is taken into account.
|
| 47 |
pipe1 = pipeline("image-to-text", model=model1_name, device=device)
|
| 48 |
pipe2 = pipeline("image-to-text", model=model2_name, device=device)
|
| 49 |
|
| 50 |
# Run inference on the image with the provided prompt.
|
| 51 |
-
# Depending on the model, the call signature may vary; here we assume a simple call with (image, prompt).
|
| 52 |
output1 = pipe1(image, prompt)
|
| 53 |
output2 = pipe2(image, prompt)
|
| 54 |
|
| 55 |
# Extract the generated text.
|
| 56 |
-
# (Many pipelines return a list of dicts with key 'generated_text'; if not, we simply convert the output to a string.)
|
| 57 |
def extract_text(output):
|
| 58 |
if isinstance(output, list) and len(output) > 0 and isinstance(output[0], dict) and "generated_text" in output[0]:
|
| 59 |
return output[0]["generated_text"]
|
|
@@ -70,7 +66,6 @@ def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, mo
|
|
| 70 |
return chat1, chat2
|
| 71 |
|
| 72 |
# --- Build the Gradio interface ---
|
| 73 |
-
# Pre-populated sample prompt.
|
| 74 |
sample_prompt = "Describe the image in explicit detail. Return a nested JSON object in response."
|
| 75 |
|
| 76 |
with gr.Blocks(title="Image Text-to-Text Comparison Tool") as demo:
|
|
|
|
| 30 |
return chosen
|
| 31 |
|
| 32 |
# --- Main inference function ---
|
|
|
|
| 33 |
# The @spaces.GPU() decorator ensures that heavy inference runs on GPU in a ZeroGPU Space.
|
| 34 |
@spaces.GPU()
|
| 35 |
def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, model2_choice, model2_custom):
|
|
|
|
| 41 |
device = 0 if os.environ.get("USE_GPU", "0") == "1" else -1
|
| 42 |
|
| 43 |
# Create pipelines for image-to-text.
|
| 44 |
+
# These models should support a call signature of (image, prompt)
|
|
|
|
| 45 |
pipe1 = pipeline("image-to-text", model=model1_name, device=device)
|
| 46 |
pipe2 = pipeline("image-to-text", model=model2_name, device=device)
|
| 47 |
|
| 48 |
# Run inference on the image with the provided prompt.
|
|
|
|
| 49 |
output1 = pipe1(image, prompt)
|
| 50 |
output2 = pipe2(image, prompt)
|
| 51 |
|
| 52 |
# Extract the generated text.
|
|
|
|
| 53 |
def extract_text(output):
|
| 54 |
if isinstance(output, list) and len(output) > 0 and isinstance(output[0], dict) and "generated_text" in output[0]:
|
| 55 |
return output[0]["generated_text"]
|
|
|
|
| 66 |
return chat1, chat2
|
| 67 |
|
| 68 |
# --- Build the Gradio interface ---
|
|
|
|
| 69 |
sample_prompt = "Describe the image in explicit detail. Return a nested JSON object in response."
|
| 70 |
|
| 71 |
with gr.Blocks(title="Image Text-to-Text Comparison Tool") as demo:
|