Spaces:

Pruthvi369i
/

ProjectExpo

Runtime error

App Files Files Community

Pruthvi369i commited on Mar 29, 2025

Commit

83ee74c

verified ·

1 Parent(s): a3b02f3

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -17

app.py CHANGED Viewed

@@ -27,30 +27,45 @@ def generate_response(image_file, prompt, max_new_tokens=512, temperature=0.7, t
         if image_file is not None:
             image = Image.open(image_file).convert('RGB')
-            # Process inputs
-            inputs = processor(
                 text=prompt,
                 images=image,
                 return_tensors="pt"
             ).to(model.device)
         else:
             # Text-only input
             inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-        # Generate response
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=max_new_tokens,
-                temperature=temperature,
-                top_p=top_p,
-                do_sample=True
-            )
-        # Decode and return the response
-        if image_file is not None:
-            response = processor.decode(outputs[0], skip_special_tokens=True)
-        else:
             response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         # Remove the input prompt from the response

         if image_file is not None:
             image = Image.open(image_file).convert('RGB')
+            # Process inputs with processor to get the right format
+            processed_inputs = processor(
                 text=prompt,
                 images=image,
                 return_tensors="pt"
             ).to(model.device)
+            # Extract only the input_ids for generation
+            input_ids = processed_inputs.pop("input_ids")
+            # Generate response
+            with torch.no_grad():
+                outputs = model.generate(
+                    input_ids=input_ids,
+                    attention_mask=processed_inputs.get("attention_mask", None),
+                    max_new_tokens=max_new_tokens,
+                    temperature=temperature,
+                    top_p=top_p,
+                    do_sample=True
+                )
+            # Decode and return the response
+            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         else:
             # Text-only input
             inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+            # Generate response
+            with torch.no_grad():
+                outputs = model.generate(
+                    **inputs,
+                    max_new_tokens=max_new_tokens,
+                    temperature=temperature,
+                    top_p=top_p,
+                    do_sample=True
+                )
+            # Decode and return the response
             response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         # Remove the input prompt from the response