dev2607 commited on
Commit
2634f59
·
verified ·
1 Parent(s): 7306baf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -43
app.py CHANGED
@@ -57,28 +57,77 @@ def extract_text_from_image(image):
57
  except (subprocess.SubprocessError, FileNotFoundError):
58
  return "Tesseract OCR is not installed or not properly configured. Please check installation."
59
 
60
- text = pytesseract.image_to_string(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
 
 
 
 
62
  if not text.strip():
63
  return "No text could be extracted. Ensure image is clear and readable."
64
 
65
  return text
66
  except Exception as e:
67
  return f"Error extracting text: {str(e)}"
68
-
69
  # Function to parse ingredients from text
70
  def parse_ingredients(text):
71
- # Basic parsing - split by commas, semicolons, and line breaks
72
  if not text:
73
  return []
74
 
75
- # Clean up the text - remove "Ingredients:" prefix if present
76
  text = re.sub(r'^ingredients:?\s*', '', text.lower(), flags=re.IGNORECASE)
77
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  # Split by common ingredient separators
79
  ingredients = re.split(r',|;|\n', text)
80
- ingredients = [i.strip().lower() for i in ingredients if i.strip()]
81
- return ingredients
 
 
 
 
 
 
 
82
 
83
  # Function to analyze ingredients with Gemini
84
  # Function to analyze ingredients with Gemini
@@ -120,49 +169,39 @@ def analyze_ingredients_with_gemini(ingredients_list, health_conditions=None):
120
  """
121
 
122
  try:
123
- # Call the Gemini API
124
  try:
125
- model = genai.GenerativeModel('gemini-pro')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  response = model.generate_content(prompt)
127
 
128
  # Check if response is valid
129
  if hasattr(response, 'text') and response.text:
130
  analysis = response.text
131
  else:
132
- # Fall back to alternative model if available
133
- try:
134
- models = genai.list_models()
135
- available_models = [m.name for m in models]
136
- if 'gemini-1.0-pro' in available_models:
137
- model = genai.GenerativeModel('gemini-1.0-pro')
138
- elif 'gemini-1.5-pro' in available_models:
139
- model = genai.GenerativeModel('gemini-1.5-pro')
140
- else:
141
- # If no alternative model is available, use dummy analysis
142
- return dummy_analyze(ingredients_list, health_conditions) + "\n\n(Using fallback analysis due to API model availability issues)"
143
-
144
- response = model.generate_content(prompt)
145
- analysis = response.text if hasattr(response, 'text') else "Error: Received empty response"
146
- except Exception as model_e:
147
- return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: {str(model_e)})"
148
  except Exception as e:
149
- if "404 models/gemini-pro is not found" in str(e):
150
- # Try listing available models and use an alternative if possible
151
- try:
152
- models = genai.list_models()
153
- available_models = [m.name for m in models]
154
- if not available_models:
155
- return dummy_analyze(ingredients_list, health_conditions) + "\n\n(Using fallback analysis due to API model availability issues)"
156
-
157
- # Use first available model
158
- model = genai.GenerativeModel(available_models[0])
159
- response = model.generate_content(prompt)
160
- analysis = response.text if hasattr(response, 'text') else "Error: Received empty response"
161
- except Exception as model_e:
162
- return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: {str(model_e)})"
163
- else:
164
- # Handle other exceptions
165
- return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: {str(e)})"
166
 
167
  # Add disclaimer
168
  disclaimer = """
@@ -176,7 +215,6 @@ def analyze_ingredients_with_gemini(ingredients_list, health_conditions=None):
176
  except Exception as e:
177
  # Fallback to basic analysis if API call fails
178
  return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: {str(e)})"
179
-
180
  # Dummy analysis function for when API is not available
181
  def dummy_analyze(ingredients_list, health_conditions=None):
182
  ingredients_text = ", ".join(ingredients_list)
 
57
  except (subprocess.SubprocessError, FileNotFoundError):
58
  return "Tesseract OCR is not installed or not properly configured. Please check installation."
59
 
60
+ # Image preprocessing for better OCR
61
+ import cv2
62
+ import numpy as np
63
+
64
+ # Convert PIL image to OpenCV format
65
+ img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
66
+
67
+ # Convert to grayscale
68
+ gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
69
+
70
+ # Apply thresholding to get black and white image
71
+ _, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
72
+
73
+ # Noise removal
74
+ kernel = np.ones((1, 1), np.uint8)
75
+ binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
76
+
77
+ # Dilate to connect text
78
+ binary = cv2.dilate(binary, kernel, iterations=1)
79
+
80
+ # Convert back to PIL image for tesseract
81
+ binary_pil = Image.fromarray(cv2.bitwise_not(binary))
82
+
83
+ # Run OCR with improved configuration
84
+ custom_config = r'--oem 3 --psm 6 -l eng'
85
+ text = pytesseract.image_to_string(binary_pil, config=custom_config)
86
 
87
+ if not text.strip():
88
+ # Try original image as fallback
89
+ text = pytesseract.image_to_string(image, config=custom_config)
90
+
91
  if not text.strip():
92
  return "No text could be extracted. Ensure image is clear and readable."
93
 
94
  return text
95
  except Exception as e:
96
  return f"Error extracting text: {str(e)}"
 
97
  # Function to parse ingredients from text
98
  def parse_ingredients(text):
 
99
  if not text:
100
  return []
101
 
102
+ # Clean up the text
103
  text = re.sub(r'^ingredients:?\s*', '', text.lower(), flags=re.IGNORECASE)
104
+
105
+ # Remove common OCR errors and extraneous characters
106
+ text = re.sub(r'[|\\/@#$%^&*()_+=]', '', text)
107
+
108
+ # Replace common OCR errors
109
+ text = re.sub(r'\bngredients\b', 'ingredients', text)
110
+
111
+ # Handle common OCR misreads
112
+ replacements = {
113
+ '0': 'o', 'l': 'i', '1': 'i',
114
+ '5': 's', '8': 'b', 'Q': 'g',
115
+ }
116
+
117
+ for error, correction in replacements.items():
118
+ text = text.replace(error, correction)
119
+
120
  # Split by common ingredient separators
121
  ingredients = re.split(r',|;|\n', text)
122
+
123
+ # Clean up each ingredient
124
+ cleaned_ingredients = []
125
+ for i in ingredients:
126
+ i = i.strip().lower()
127
+ if i and len(i) > 1: # Ignore single characters which are likely OCR errors
128
+ cleaned_ingredients.append(i)
129
+
130
+ return cleaned_ingredients
131
 
132
  # Function to analyze ingredients with Gemini
133
  # Function to analyze ingredients with Gemini
 
169
  """
170
 
171
  try:
172
+ # First, check available models
173
  try:
174
+ models = genai.list_models()
175
+ available_models = [m.name for m in models]
176
+
177
+ # Try models in order of preference
178
+ model_names = ['gemini-pro', 'gemini-1.5-pro', 'gemini-1.0-pro']
179
+
180
+ # Find first available model from our preference list
181
+ model_name = None
182
+ for name in model_names:
183
+ if any(name in m for m in available_models):
184
+ model_name = name
185
+ break
186
+
187
+ # If none of our preferred models are available, use the first available model
188
+ if not model_name and available_models:
189
+ model_name = available_models[0]
190
+
191
+ if not model_name:
192
+ return dummy_analyze(ingredients_list, health_conditions) + "\n\n(Using fallback analysis: No available models found)"
193
+
194
+ model = genai.GenerativeModel(model_name)
195
  response = model.generate_content(prompt)
196
 
197
  # Check if response is valid
198
  if hasattr(response, 'text') and response.text:
199
  analysis = response.text
200
  else:
201
+ return dummy_analyze(ingredients_list, health_conditions) + "\n\n(Using fallback analysis: Empty API response)"
202
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  except Exception as e:
204
+ return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: {str(e)})"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  # Add disclaimer
207
  disclaimer = """
 
215
  except Exception as e:
216
  # Fallback to basic analysis if API call fails
217
  return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: {str(e)})"
 
218
  # Dummy analysis function for when API is not available
219
  def dummy_analyze(ingredients_list, health_conditions=None):
220
  ingredients_text = ", ".join(ingredients_list)