Akıllı kelime filtreleme - spesifik kelime yok
Browse files- Consonant/vowel oranına göre filtreleme
- Alphanumeric kodları koru
- 2-3 harfli product kodları tanı
- Tamamen genel algoritma
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
app.py
CHANGED
|
@@ -53,18 +53,42 @@ def get_warehouse_stock(product_name):
|
|
| 53 |
query = normalize(product_name.strip()).replace('(2026)', '').replace('(2025)', '').strip()
|
| 54 |
words = query.split()
|
| 55 |
|
| 56 |
-
#
|
| 57 |
-
ignore_words = ['var', 'mi', 'mı', 'mu', 'mü', 'varmi', 'varmı', 'beden', 'size', 'boy',
|
| 58 |
-
'stok', 'stokta', 'mevcut', 'hangi', 'magazada', 'nerede', 'kaç', 'adet', 'tane',
|
| 59 |
-
'trek', 'bisiklet', 'bike']
|
| 60 |
-
|
| 61 |
-
# Find size
|
| 62 |
sizes = ['s', 'm', 'l', 'xl', 'xs', 'xxl', 'ml']
|
| 63 |
size = next((w for w in words if w in sizes), None)
|
| 64 |
|
| 65 |
-
#
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
print(f"DEBUG - Searching: {' '.join(product_words)}, Size: {size}")
|
| 70 |
|
|
|
|
| 53 |
query = normalize(product_name.strip()).replace('(2026)', '').replace('(2025)', '').strip()
|
| 54 |
words = query.split()
|
| 55 |
|
| 56 |
+
# Find size markers (S, M, L, etc.)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
sizes = ['s', 'm', 'l', 'xl', 'xs', 'xxl', 'ml']
|
| 58 |
size = next((w for w in words if w in sizes), None)
|
| 59 |
|
| 60 |
+
# Smart filtering: Keep only meaningful product identifiers
|
| 61 |
+
product_words = []
|
| 62 |
+
|
| 63 |
+
# First pass: identify what looks like product terms
|
| 64 |
+
for word in words:
|
| 65 |
+
# Skip if it's a size marker
|
| 66 |
+
if word in sizes:
|
| 67 |
+
continue
|
| 68 |
+
|
| 69 |
+
# Always keep numbers (model numbers like 6, 7, 8)
|
| 70 |
+
if word.isdigit():
|
| 71 |
+
product_words.append(word)
|
| 72 |
+
|
| 73 |
+
# Keep alphanumeric codes (like "sl6", "gen8")
|
| 74 |
+
elif any(c.isdigit() for c in word) and any(c.isalpha() for c in word):
|
| 75 |
+
product_words.append(word)
|
| 76 |
+
|
| 77 |
+
# Keep 2-3 letter codes (often product codes like "sl", "slr", "emx")
|
| 78 |
+
elif 2 <= len(word) <= 3 and word.isalpha():
|
| 79 |
+
# Check if it has consonants (likely a code, not a particle)
|
| 80 |
+
if any(c not in 'aeiou' for c in word):
|
| 81 |
+
product_words.append(word)
|
| 82 |
+
|
| 83 |
+
# Keep longer words that have good consonant/vowel mix (likely product names)
|
| 84 |
+
elif len(word) > 3:
|
| 85 |
+
# Calculate consonant ratio
|
| 86 |
+
consonants = sum(1 for c in word if c not in 'aeiou')
|
| 87 |
+
vowels = len(word) - consonants
|
| 88 |
+
# Product names usually have balanced or consonant-heavy distribution
|
| 89 |
+
# Turkish question words are often vowel-heavy
|
| 90 |
+
if consonants >= vowels * 0.5: # At least 1 consonant per 2 vowels
|
| 91 |
+
product_words.append(word)
|
| 92 |
|
| 93 |
print(f"DEBUG - Searching: {' '.join(product_words)}, Size: {size}")
|
| 94 |
|