Spaces:

nutrientartcd
/

recipe-ai-fastapi

Sleeping

kingking111009 commited on Aug 26, 2025

Commit

be9504b

1 Parent(s): b079cdb

Remove all fallback data and add rating-based recipe filtering

- Remove all mock/fallback recipe data
- Add proper rating filter using RAW_interactions.csv
- Only return recipes with rating >= 4.0 and >= 2 reviews
- Service fails cleanly if database cannot be loaded
- No fake data - real database or nothing

🤖 Generated with [Claude Code](https://claude.ai/code)

Files changed (1) hide show

app.py +58 -63

app.py CHANGED Viewed

@@ -34,6 +34,7 @@ app.add_middleware(
 tokenizer = None
 model = None
 recipes_df = None
 vectorizer = None
 recipe_vectors = None
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -77,9 +78,49 @@ def safe_eval_list(x):
             return [item.strip() for item in x.split(',') if item.strip()]
     return []
 def load_recipes():
-    """Load and process the RAW_recipes.csv file from Hugging Face dataset"""
-    global recipes_df, vectorizer, recipe_vectors
     try:
         # Try to load from Hugging Face dataset directly
@@ -115,6 +156,15 @@ def load_recipes():
                 df = pd.read_csv(local_csv)
                 print(f"✅ Successfully downloaded and loaded {len(df)} recipes from CSV!")
         except Exception as hf_error:
             print(f"⚠️ Both Hugging Face methods failed: {hf_error}")
@@ -149,6 +199,11 @@ def load_recipes():
         missing_cols = [col for col in required_cols if col not in df.columns]
         if missing_cols:
             raise ValueError(f"Missing required columns: {missing_cols}")
         # Parse string lists
         df['ingredients'] = df['ingredients'].apply(safe_eval_list)
@@ -191,67 +246,7 @@ def load_recipes():
     except Exception as e:
         print(f"❌ Error loading recipes: {e}")
         print(f"📍 Error details: {type(e).__name__}: {str(e)}")
-        # Create a more comprehensive fallback dataset
-        print("🔄 Creating fallback recipe dataset...")
-        recipes_df = pd.DataFrame({
-            'id': [234567, 458976, 123789, 345678, 567890],
-            'name': [
-                '15-Minute Pasta Aglio e Olio',
-                'Lemon Herb Grilled Chicken',
-                'Rainbow Buddha Bowl',
-                'Mediterranean Quinoa Salad',
-                'Classic Caesar Salad'
-            ],
-            'minutes': [15, 25, 30, 20, 10],
-            'ingredients': [
-                ['1 lb spaghetti', '6 cloves garlic (sliced)', '1/2 cup olive oil', '1/4 cup fresh parsley', 'red pepper flakes'],
-                ['4 chicken breasts', '2 lemons (juiced)', '2 tbsp olive oil', '2 tsp dried herbs', 'salt and pepper'],
-                ['1 cup quinoa', '2 cups mixed vegetables', '3 tbsp tahini', '1 lemon (juiced)', '2 tbsp olive oil'],
-                ['2 cups cooked quinoa', '1 cup cherry tomatoes', '1 cucumber (diced)', '1/2 cup olives', '3 tbsp olive oil'],
-                ['1 large romaine lettuce', '1/2 cup parmesan cheese', '1/4 cup caesar dressing', '1/2 cup croutons', 'black pepper']
-            ],
-            'steps': [
-                ['Cook pasta until al dente', 'Heat oil and sauté garlic until golden', 'Toss pasta with oil and garlic', 'Add parsley and pepper flakes'],
-                ['Marinate chicken in lemon juice and herbs for 30 minutes', 'Heat grill to medium-high heat', 'Grill chicken 6-8 minutes per side', 'Rest for 5 minutes before serving'],
-                ['Cook quinoa according to package directions', 'Roast vegetables at 400°F for 25 minutes', 'Whisk tahini with lemon juice', 'Assemble bowl and drizzle with dressing'],
-                ['Cool cooked quinoa completely', 'Dice all vegetables', 'Combine quinoa and vegetables', 'Dress with olive oil and lemon'],
-                ['Wash and chop romaine lettuce', 'Toss with caesar dressing', 'Top with parmesan and croutons', 'Season with black pepper']
-            ],
-            'tags': [['quick', 'italian', 'pasta'], ['healthy', 'protein', 'grilled'], ['vegetarian', 'healthy', 'bowl'], ['vegetarian', 'mediterranean', 'salad'], ['salad', 'classic', 'vegetarian']],
-            'nutrition': [[], [], [], [], []],
-            'description': [
-                'A classic Italian dish that\'s simple yet delicious.',
-                'Fresh and flavorful grilled chicken with herbs and bright lemon flavor.',
-                'A nutritious and colorful bowl packed with healthy ingredients.',
-                'A protein-rich salad with fresh vegetables and herbs.',
-                'A classic caesar salad with crisp romaine and parmesan.'
-            ]
-        })
-        # Process the fallback dataset the same way
-        recipes_df['ingredients_text'] = recipes_df['ingredients'].apply(lambda x: ' '.join(x).lower())
-        recipes_df['steps_text'] = recipes_df['steps'].apply(lambda x: ' '.join(x).lower())
-        recipes_df['tags_text'] = recipes_df['tags'].apply(lambda x: ' '.join(x).lower())
-        recipes_df['search_text'] = (
-            recipes_df['name'].str.lower() + ' ' +
-            recipes_df['ingredients_text'] + ' ' +
-            recipes_df['tags_text'] + ' ' +
-            recipes_df['description'].fillna('').str.lower()
-        )
-        # Create simple vectorizer for fallback
-        print("🔍 Building fallback search index...")
-        vectorizer = TfidfVectorizer(
-            max_features=1000,
-            stop_words='english',
-            ngram_range=(1, 2),
-            min_df=1
-        )
-        recipe_vectors = vectorizer.fit_transform(recipes_df['search_text'])
-        print(f"✅ Fallback dataset ready with {len(recipes_df)} recipes!")
-        return  # Exit early for fallback dataset
 @torch.inference_mode()
 def extract_query_features_with_gpt2(query_text, preferences="", max_minutes=30):

 tokenizer = None
 model = None
 recipes_df = None
+interactions_df = None
 vectorizer = None
 recipe_vectors = None
 device = "cuda" if torch.cuda.is_available() else "cpu"
             return [item.strip() for item in x.split(',') if item.strip()]
     return []
+def filter_by_ratings(recipes_df, interactions_df, min_rating=4.0, min_reviews=2):
+    """Filter recipes to only include those with good ratings"""
+    try:
+        print(f"📊 Processing {len(interactions_df)} interactions for rating filter...")
+        # Calculate average rating and review count for each recipe
+        recipe_stats = interactions_df.groupby('recipe_id').agg({
+            'rating': ['mean', 'count'],
+            'review': lambda x: x.dropna().apply(lambda review: len(str(review)) > 10).sum()  # Count meaningful reviews
+        }).reset_index()
+        # Flatten column names
+        recipe_stats.columns = ['recipe_id', 'avg_rating', 'rating_count', 'meaningful_reviews']
+        # Filter for high-quality recipes
+        high_quality = recipe_stats[
+            (recipe_stats['avg_rating'] >= min_rating) &
+            (recipe_stats['rating_count'] >= min_reviews)
+        ]
+        print(f"🏆 Found {len(high_quality)} recipes with rating >= {min_rating} and >= {min_reviews} reviews")
+        # Join with recipes and keep only high-quality ones
+        filtered_recipes = recipes_df.merge(
+            high_quality[['recipe_id', 'avg_rating', 'rating_count']],
+            left_on='id',
+            right_on='recipe_id',
+            how='inner'
+        )
+        # Add rating info to the dataframe
+        filtered_recipes['avg_rating'] = filtered_recipes['avg_rating'].round(1)
+        print(f"✅ Quality filter complete: {len(filtered_recipes)} highly-rated recipes")
+        return filtered_recipes
+    except Exception as e:
+        print(f"⚠️ Rating filter failed: {e}")
+        raise Exception(f"Failed to apply rating filter: {e}")
 def load_recipes():
+    """Load and process both RAW_recipes.csv and RAW_interactions.csv with rating filtering"""
+    global recipes_df, interactions_df, vectorizer, recipe_vectors
     try:
         # Try to load from Hugging Face dataset directly
                 df = pd.read_csv(local_csv)
                 print(f"✅ Successfully downloaded and loaded {len(df)} recipes from CSV!")
+                # Also download interactions CSV for rating filtering
+                interactions_url = "https://huggingface.co/datasets/nutrientartcd/recipe-dataset/resolve/main/RAW_interactions.csv"
+                local_interactions = "/tmp/RAW_interactions_downloaded.csv"
+                print("📊 Downloading interactions data for rating filtering...")
+                urllib.request.urlretrieve(interactions_url, local_interactions)
+                interactions_df = pd.read_csv(local_interactions)
+                print(f"✅ Loaded {len(interactions_df)} interactions for rating filtering!")
         except Exception as hf_error:
             print(f"⚠️ Both Hugging Face methods failed: {hf_error}")
         missing_cols = [col for col in required_cols if col not in df.columns]
         if missing_cols:
             raise ValueError(f"Missing required columns: {missing_cols}")
+        # Filter recipes based on ratings from interactions
+        if interactions_df is not None:
+            df = filter_by_ratings(df, interactions_df)
+            print(f"📈 After rating filter: {len(df)} high-quality recipes remaining")
         # Parse string lists
         df['ingredients'] = df['ingredients'].apply(safe_eval_list)
     except Exception as e:
         print(f"❌ Error loading recipes: {e}")
         print(f"📍 Error details: {type(e).__name__}: {str(e)}")
+        raise Exception(f"Failed to load recipe database: {e}")
 @torch.inference_mode()
 def extract_query_features_with_gpt2(query_text, preferences="", max_minutes=30):