TTS_API_Image_fallback

Sleeping

App Files Files Community

khurrameycon commited on Apr 6, 2025

Commit

a318fb7

verified ·

1 Parent(s): cf91db0

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -36

app.py CHANGED Viewed

@@ -119,44 +119,51 @@ def llm_chat_response(text, image_base64=None):
     HF_TOKEN = os.getenv("HF_TOKEN")
     client = InferenceClient(api_key=HF_TOKEN)
-    message_content = [
-        {
-            "type": "text",
-            "text": text + str('describe in one line only')
-        }
-    ]
-    # If image_base64 is provided, add it to the message content
     if image_base64:
-        # Convert base64 to PIL Image for validation
-        try:
-            image_bytes = base64.b64decode(image_base64)
-            # Validate that it's a proper image
-            Image.open(BytesIO(image_bytes))
-            # Add the image to message content
-            message_content.append({
-                "type": "image",
-                "image": {
-                    "data": image_base64
-                }
-            })
-        except Exception as e:
-            print(f"Error processing image: {e}")
-    messages = [
-        {
-            "role": "user",
-            "content": message_content
-        }
-    ]
-    response_from_llama = client.chat.completions.create(
-        model="meta-llama/Llama-3.2-11B-Vision-Instruct",
-        messages=messages,
-        max_tokens=500
-    )
-    return response_from_llama.choices[0].message['content']
 app = FastAPI()
 # Initialize pipeline once at startup

     HF_TOKEN = os.getenv("HF_TOKEN")
     client = InferenceClient(api_key=HF_TOKEN)
+    # Create a proper conversational format as required by the API
     if image_base64:
+        # For image + text, we need to use the conversation format
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": text if text else "Describe what you see in the image"
+                    },
+                    {
+                        "type": "image",
+                        "image": {
+                            "data": image_base64
+                        }
+                    }
+                ]
+            }
+        ]
+    else:
+        # Text only
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": text + " Describe in one line only."
+                    }
+                ]
+            }
+        ]
+    try:
+        response_from_llama = client.chat.completions.create(
+            model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+            messages=messages,
+            max_tokens=500
+        )
+        return response_from_llama.choices[0].message['content']
+    except Exception as e:
+        print(f"Error calling LLM API: {e}")
+        # Fallback response in case of error
+        return "I couldn't process that image. Please try again with a different image or text query."
 app = FastAPI()
 # Initialize pipeline once at startup