Spaces:

dev2607
/

gemini-fastapi-wrapper

Build error

App Files Files Community

dev2607 commited on Mar 6, 2025

Commit

2634f59

verified ·

1 Parent(s): 7306baf

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -43

app.py CHANGED Viewed

@@ -57,28 +57,77 @@ def extract_text_from_image(image):
         except (subprocess.SubprocessError, FileNotFoundError):
             return "Tesseract OCR is not installed or not properly configured. Please check installation."
-        text = pytesseract.image_to_string(image)
         if not text.strip():
             return "No text could be extracted. Ensure image is clear and readable."
         return text
     except Exception as e:
         return f"Error extracting text: {str(e)}"
 # Function to parse ingredients from text
 def parse_ingredients(text):
-    # Basic parsing - split by commas, semicolons, and line breaks
     if not text:
         return []
-    # Clean up the text - remove "Ingredients:" prefix if present
     text = re.sub(r'^ingredients:?\s*', '', text.lower(), flags=re.IGNORECASE)
     # Split by common ingredient separators
     ingredients = re.split(r',|;|\n', text)
-    ingredients = [i.strip().lower() for i in ingredients if i.strip()]
-    return ingredients
 # Function to analyze ingredients with Gemini
 # Function to analyze ingredients with Gemini
@@ -120,49 +169,39 @@ def analyze_ingredients_with_gemini(ingredients_list, health_conditions=None):
         """
     try:
-        # Call the Gemini API
         try:
-            model = genai.GenerativeModel('gemini-pro')
             response = model.generate_content(prompt)
             # Check if response is valid
             if hasattr(response, 'text') and response.text:
                 analysis = response.text
             else:
-                # Fall back to alternative model if available
-                try:
-                    models = genai.list_models()
-                    available_models = [m.name for m in models]
-                    if 'gemini-1.0-pro' in available_models:
-                        model = genai.GenerativeModel('gemini-1.0-pro')
-                    elif 'gemini-1.5-pro' in available_models:
-                        model = genai.GenerativeModel('gemini-1.5-pro')
-                    else:
-                        # If no alternative model is available, use dummy analysis
-                        return dummy_analyze(ingredients_list, health_conditions) + "\n\n(Using fallback analysis due to API model availability issues)"
-                    response = model.generate_content(prompt)
-                    analysis = response.text if hasattr(response, 'text') else "Error: Received empty response"
-                except Exception as model_e:
-                    return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: {str(model_e)})"
         except Exception as e:
-            if "404 models/gemini-pro is not found" in str(e):
-                # Try listing available models and use an alternative if possible
-                try:
-                    models = genai.list_models()
-                    available_models = [m.name for m in models]
-                    if not available_models:
-                        return dummy_analyze(ingredients_list, health_conditions) + "\n\n(Using fallback analysis due to API model availability issues)"
-                    # Use first available model
-                    model = genai.GenerativeModel(available_models[0])
-                    response = model.generate_content(prompt)
-                    analysis = response.text if hasattr(response, 'text') else "Error: Received empty response"
-                except Exception as model_e:
-                    return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: {str(model_e)})"
-            else:
-                # Handle other exceptions
-                return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: {str(e)})"
         # Add disclaimer
         disclaimer = """
@@ -176,7 +215,6 @@ def analyze_ingredients_with_gemini(ingredients_list, health_conditions=None):
     except Exception as e:
         # Fallback to basic analysis if API call fails
         return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: {str(e)})"
 # Dummy analysis function for when API is not available
 def dummy_analyze(ingredients_list, health_conditions=None):
     ingredients_text = ", ".join(ingredients_list)

         except (subprocess.SubprocessError, FileNotFoundError):
             return "Tesseract OCR is not installed or not properly configured. Please check installation."
+        # Image preprocessing for better OCR
+        import cv2
+        import numpy as np
+        # Convert PIL image to OpenCV format
+        img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+        # Convert to grayscale
+        gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
+        # Apply thresholding to get black and white image
+        _, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+        # Noise removal
+        kernel = np.ones((1, 1), np.uint8)
+        binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
+        # Dilate to connect text
+        binary = cv2.dilate(binary, kernel, iterations=1)
+        # Convert back to PIL image for tesseract
+        binary_pil = Image.fromarray(cv2.bitwise_not(binary))
+        # Run OCR with improved configuration
+        custom_config = r'--oem 3 --psm 6 -l eng'
+        text = pytesseract.image_to_string(binary_pil, config=custom_config)
+        if not text.strip():
+            # Try original image as fallback
+            text = pytesseract.image_to_string(image, config=custom_config)
         if not text.strip():
             return "No text could be extracted. Ensure image is clear and readable."
         return text
     except Exception as e:
         return f"Error extracting text: {str(e)}"
 # Function to parse ingredients from text
 def parse_ingredients(text):
     if not text:
         return []
+    # Clean up the text
     text = re.sub(r'^ingredients:?\s*', '', text.lower(), flags=re.IGNORECASE)
+    # Remove common OCR errors and extraneous characters
+    text = re.sub(r'[|\\/@#$%^&*()_+=]', '', text)
+    # Replace common OCR errors
+    text = re.sub(r'\bngredients\b', 'ingredients', text)
+    # Handle common OCR misreads
+    replacements = {
+        '0': 'o', 'l': 'i', '1': 'i',
+        '5': 's', '8': 'b', 'Q': 'g',
+    }
+    for error, correction in replacements.items():
+        text = text.replace(error, correction)
     # Split by common ingredient separators
     ingredients = re.split(r',|;|\n', text)
+    # Clean up each ingredient
+    cleaned_ingredients = []
+    for i in ingredients:
+        i = i.strip().lower()
+        if i and len(i) > 1:  # Ignore single characters which are likely OCR errors
+            cleaned_ingredients.append(i)
+    return cleaned_ingredients
 # Function to analyze ingredients with Gemini
 # Function to analyze ingredients with Gemini
         """
     try:
+        # First, check available models
         try:
+            models = genai.list_models()
+            available_models = [m.name for m in models]
+            # Try models in order of preference
+            model_names = ['gemini-pro', 'gemini-1.5-pro', 'gemini-1.0-pro']
+            # Find first available model from our preference list
+            model_name = None
+            for name in model_names:
+                if any(name in m for m in available_models):
+                    model_name = name
+                    break
+            # If none of our preferred models are available, use the first available model
+            if not model_name and available_models:
+                model_name = available_models[0]
+            if not model_name:
+                return dummy_analyze(ingredients_list, health_conditions) + "\n\n(Using fallback analysis: No available models found)"
+            model = genai.GenerativeModel(model_name)
             response = model.generate_content(prompt)
             # Check if response is valid
             if hasattr(response, 'text') and response.text:
                 analysis = response.text
             else:
+                return dummy_analyze(ingredients_list, health_conditions) + "\n\n(Using fallback analysis: Empty API response)"
         except Exception as e:
+            return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: {str(e)})"
         # Add disclaimer
         disclaimer = """
     except Exception as e:
         # Fallback to basic analysis if API call fails
         return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: {str(e)})"
 # Dummy analysis function for when API is not available
 def dummy_analyze(ingredients_list, health_conditions=None):
     ingredients_text = ", ".join(ingredients_list)