Matan Kriel commited on
Commit
5d71831
Β·
1 Parent(s): d10e213

app and requierments fix

Browse files
Files changed (1) hide show
  1. app.py +25 -156
app.py CHANGED
@@ -2,41 +2,26 @@
2
  import gradio as gr
3
  import pandas as pd
4
  import numpy as np
5
- import glob
6
  import os
 
7
  from deepface import DeepFace
8
- from PIL import Image
9
  from sklearn.metrics.pairwise import cosine_similarity
10
 
11
  # --- 1. Load the Knowledge Base (Specific Target) ---
12
- # We prioritize your specific file, but keep a fallback just in case
13
  TARGET_DB = "famous_faces_ArcFace_standalone.parquet"
14
 
15
  if os.path.exists(TARGET_DB):
16
  DB_PATH = TARGET_DB
17
- else:
18
- # Fallback: Find any parquet file if the specific one is missing
19
- parquet_files = glob.glob("famous_faces_*.parquet")
20
- if parquet_files:
21
- # Sort by modification time to get the newest one
22
- parquet_files.sort(key=os.path.getmtime, reverse=True)
23
- DB_PATH = parquet_files[0]
24
- else:
25
- DB_PATH = None
26
-
27
- if DB_PATH:
28
  print(f"πŸ“‚ Loaded Knowledge Base: {DB_PATH}")
29
  df_db = pd.read_parquet(DB_PATH)
30
  print(f"πŸ“Š Database columns: {df_db.columns.tolist()}")
31
  print(f"πŸ“Š Database shape: {df_db.shape}")
 
32
  # Convert embedding column to a clean numpy matrix for fast math
33
  DB_VECTORS = np.stack(df_db['embedding'].values)
34
 
35
  # Identify Model Name from filename
36
- # e.g. "famous_faces_GhostFaceNet.parquet" -> "GhostFaceNet"
37
- # Identify Model Name from filename
38
- # e.g. "famous_faces_GhostFaceNet.parquet" -> "GhostFaceNet"
39
- # e.g. "famous_faces_ArcFace_standalone.parquet" -> "ArcFace"
40
  filename_no_ext = os.path.basename(DB_PATH).replace(".parquet", "")
41
  parts = filename_no_ext.split("_")
42
 
@@ -46,96 +31,24 @@ if DB_PATH:
46
  MODEL_NAME = parts[-1]
47
  print(f"βš™οΈ Model configured: {MODEL_NAME}")
48
 
49
- # Debug: Check what directories exist
50
- print(f"πŸ” Checking available directories...")
51
- if os.path.exists("my_dataset"):
52
- files_in_dataset = os.listdir("my_dataset")[:5] # Show first 5
53
- print(f" Found 'my_dataset' directory with {len(os.listdir('my_dataset'))} files")
54
- print(f" Sample files: {files_in_dataset}")
55
- else:
56
- print(f" 'my_dataset' directory not found")
57
-
58
- # Debug: Show sample image paths from database
59
- if 'image_path' in df_db.columns:
60
- sample_paths = df_db['image_path'].head(3).tolist()
61
- print(f"πŸ“· Sample image paths from DB: {sample_paths}")
62
  else:
63
- print("❌ CRITICAL: No Parquet file found! Please run the Model Battle step.")
 
64
  DB_VECTORS = None
65
  MODEL_NAME = "Unknown"
 
66
 
67
- # --- 2. Helper function to load images with fallbacks ---
68
- def load_image_with_fallback(image_path, name):
69
- """
70
- Try to load image from various possible locations, or create placeholder.
71
-
72
- The parquet file stores image_path (e.g., 'my_dataset/Sahar_Milis.png'),
73
- but we need to find where the actual image files are located.
74
- """
75
- tried_paths = []
76
-
77
- # Strategy 1: Try original path as-is
78
- if os.path.exists(image_path):
79
- try:
80
- return Image.open(image_path), image_path
81
- except Exception as e:
82
- tried_paths.append(f"{image_path} (error: {e})")
83
- else:
84
- tried_paths.append(f"{image_path} (not found)")
85
-
86
- # Strategy 2: Try just the filename in current directory
87
- filename = os.path.basename(image_path)
88
- if os.path.exists(filename):
89
- try:
90
- return Image.open(filename), filename
91
- except Exception as e:
92
- tried_paths.append(f"{filename} (error: {e})")
93
- else:
94
- tried_paths.append(f"{filename} (not found)")
95
-
96
- # Strategy 3: Try in my_dataset directory (remove my_dataset/ prefix if present)
97
- if image_path.startswith("my_dataset/"):
98
- # Already has prefix, try as-is
99
- dataset_path = image_path
100
- else:
101
- # Add prefix
102
- dataset_path = os.path.join("my_dataset", filename)
103
-
104
- if os.path.exists(dataset_path):
105
- try:
106
- return Image.open(dataset_path), dataset_path
107
- except Exception as e:
108
- tried_paths.append(f"{dataset_path} (error: {e})")
109
- else:
110
- tried_paths.append(f"{dataset_path} (not found)")
111
-
112
- # Strategy 4: Try searching in current directory recursively
113
- import glob
114
- search_patterns = [
115
- f"**/{filename}",
116
- f"**/*{filename}",
117
- f"**/{name.replace(' ', '_')}*",
118
- f"**/{name.replace(' ', '-')}*",
119
- ]
120
- for pattern in search_patterns:
121
- matches = glob.glob(pattern, recursive=True)
122
- if matches:
123
- try:
124
- return Image.open(matches[0]), matches[0]
125
- except:
126
- continue
127
-
128
- # Strategy 5: Create a placeholder image with name
129
  placeholder = Image.new('RGB', (200, 200), color=(220, 220, 220))
130
- from PIL import ImageDraw, ImageFont
131
  draw = ImageDraw.Draw(placeholder)
132
- # Try to use default font, fallback to basic if not available
133
  try:
134
  font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 16)
135
  except:
136
  font = ImageFont.load_default()
137
 
138
- # Draw name and "Not Found" text
139
  text_lines = [name[:15], "Image", "Not Found"]
140
  y_offset = 50
141
  for line in text_lines:
@@ -144,11 +57,8 @@ def load_image_with_fallback(image_path, name):
144
  position = ((200 - text_width) // 2, y_offset)
145
  draw.text(position, line, fill=(100, 100, 100), font=font)
146
  y_offset += 30
147
-
148
- print(f"⚠️ Could not find image for {name}. Tried: {tried_paths[:3]}")
149
- return placeholder, None
150
 
151
- # --- 3. Define the Search Logic ---
152
  def find_best_matches(user_image):
153
  # Error handling for empty inputs
154
  if user_image is None:
@@ -184,63 +94,22 @@ def find_best_matches(user_image):
184
  display_name = f"{row['name']} (Match: {int(score*100)}%)"
185
  result_text += f"### #{i}: {display_name}\n\n"
186
 
187
- # Load Image - prioritize bytes from parquet, then try file paths
188
- try:
189
- img = None
190
- found_path = None
191
-
192
- # Strategy 1: Check if image is stored as bytes in parquet (BEST - self-contained)
193
- if 'image_bytes' in df_db.columns and row.get('image_bytes') is not None:
194
- try:
195
- import io
196
- img = Image.open(io.BytesIO(row['image_bytes']))
197
- found_path = "parquet (embedded)"
198
- except Exception as e:
199
- print(f"⚠️ Could not load image bytes for {row['name']}: {e}")
200
-
201
- # Strategy 2: Try loading from file path (fallback)
202
- if img is None and 'image_path' in df_db.columns:
203
- img, found_path = load_image_with_fallback(row['image_path'], row['name'])
204
-
205
- # Strategy 3: Create placeholder if still no image
206
- if img is None:
207
- placeholder = Image.new('RGB', (200, 200), color=(220, 220, 220))
208
- from PIL import ImageDraw, ImageFont
209
- draw = ImageDraw.Draw(placeholder)
210
- try:
211
- font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 16)
212
- except:
213
- font = ImageFont.load_default()
214
- text_lines = [row['name'][:15], "Image", "Not Found"]
215
- y_offset = 50
216
- for line in text_lines:
217
- bbox = draw.textbbox((0, 0), line, font=font)
218
- text_width = bbox[2] - bbox[0]
219
- position = ((200 - text_width) // 2, y_offset)
220
- draw.text(position, line, fill=(100, 100, 100), font=font)
221
- y_offset += 30
222
- img = placeholder
223
- found_path = None
224
-
225
- gallery_images.append((img, display_name))
226
-
227
- # Add status message
228
- if found_path == "parquet (embedded)":
229
  result_text += f"βœ“ Image loaded from parquet\n\n"
230
- elif found_path:
231
- result_text += f"βœ“ Found image at: {found_path}\n\n"
232
- else:
233
- result_text += f"⚠️ Image not found for {row['name']}\n\n"
234
-
235
- except Exception as img_error:
236
- # Even if loading fails, create placeholder
237
- placeholder = Image.new('RGB', (200, 200), color=(220, 220, 220))
238
- gallery_images.append((placeholder, display_name))
239
- result_text += f"⚠️ Error loading image: {str(img_error)}\n\n"
240
 
241
- # Don't pad with None - Gallery can't handle None images
242
- # Just return what we have
243
-
244
  return gallery_images, result_text
245
 
246
  except Exception as e:
 
2
  import gradio as gr
3
  import pandas as pd
4
  import numpy as np
 
5
  import os
6
+ import io
7
  from deepface import DeepFace
8
+ from PIL import Image, ImageDraw, ImageFont
9
  from sklearn.metrics.pairwise import cosine_similarity
10
 
11
  # --- 1. Load the Knowledge Base (Specific Target) ---
 
12
  TARGET_DB = "famous_faces_ArcFace_standalone.parquet"
13
 
14
  if os.path.exists(TARGET_DB):
15
  DB_PATH = TARGET_DB
 
 
 
 
 
 
 
 
 
 
 
16
  print(f"πŸ“‚ Loaded Knowledge Base: {DB_PATH}")
17
  df_db = pd.read_parquet(DB_PATH)
18
  print(f"πŸ“Š Database columns: {df_db.columns.tolist()}")
19
  print(f"πŸ“Š Database shape: {df_db.shape}")
20
+
21
  # Convert embedding column to a clean numpy matrix for fast math
22
  DB_VECTORS = np.stack(df_db['embedding'].values)
23
 
24
  # Identify Model Name from filename
 
 
 
 
25
  filename_no_ext = os.path.basename(DB_PATH).replace(".parquet", "")
26
  parts = filename_no_ext.split("_")
27
 
 
31
  MODEL_NAME = parts[-1]
32
  print(f"βš™οΈ Model configured: {MODEL_NAME}")
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  else:
35
+ print("❌ CRITICAL: Parquet file not found!")
36
+ DB_PATH = None
37
  DB_VECTORS = None
38
  MODEL_NAME = "Unknown"
39
+ df_db = None
40
 
41
+
42
+ # --- 2. Define the Search Logic ---
43
+ def create_placeholder(name):
44
+ """Creates a placeholder image with the name if the actual image is missing."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  placeholder = Image.new('RGB', (200, 200), color=(220, 220, 220))
 
46
  draw = ImageDraw.Draw(placeholder)
 
47
  try:
48
  font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 16)
49
  except:
50
  font = ImageFont.load_default()
51
 
 
52
  text_lines = [name[:15], "Image", "Not Found"]
53
  y_offset = 50
54
  for line in text_lines:
 
57
  position = ((200 - text_width) // 2, y_offset)
58
  draw.text(position, line, fill=(100, 100, 100), font=font)
59
  y_offset += 30
60
+ return placeholder
 
 
61
 
 
62
  def find_best_matches(user_image):
63
  # Error handling for empty inputs
64
  if user_image is None:
 
94
  display_name = f"{row['name']} (Match: {int(score*100)}%)"
95
  result_text += f"### #{i}: {display_name}\n\n"
96
 
97
+ # Load Image from Bytes (Parquet)
98
+ img = None
99
+ if 'image_bytes' in df_db.columns and row.get('image_bytes') is not None:
100
+ try:
101
+ img = Image.open(io.BytesIO(row['image_bytes']))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  result_text += f"βœ“ Image loaded from parquet\n\n"
103
+ except Exception as e:
104
+ print(f"⚠️ Could not load image bytes for {row['name']}: {e}")
105
+
106
+ # Fallback to placeholder
107
+ if img is None:
108
+ img = create_placeholder(row['name'])
109
+ result_text += f"⚠️ Image not found (Bytes missing)\n\n"
110
+
111
+ gallery_images.append((img, display_name))
 
112
 
 
 
 
113
  return gallery_images, result_text
114
 
115
  except Exception as e: