Spaces:
Runtime error
Runtime error
yusenthebot
commited on
Commit
·
81e637f
1
Parent(s):
d78eb74
Initial deployment
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gradio/certificate.pem +31 -0
- README.md +152 -12
- app.py +592 -0
- frige_detect/__pycache__/detect.cpython-313.pyc +0 -0
- frige_detect/annotated_image.jpg +0 -0
- frige_detect/demo/t1.jpg +0 -0
- frige_detect/demo/t2.jpg +0 -0
- frige_detect/demo/t3.jpg +0 -0
- frige_detect/demo/t4.jpg +0 -0
- frige_detect/detect.py +208 -0
- frige_detect/recipe_input.json +86 -0
- frige_detect/roboflow_credentials.txt +4 -0
- recipe_recommendation/__init__.py +0 -0
- recipe_recommendation/__pycache__/__init__.cpython-313.pyc +0 -0
- recipe_recommendation/__pycache__/main.cpython-313.pyc +0 -0
- recipe_recommendation/data/ingredient_map.data +0 -0
- recipe_recommendation/main.py +652 -0
- recipe_recommendation/readme.txt +142 -0
- recipe_recommendation/readme_cn.txt +92 -0
- recipe_recommendation/src/__init__.py +0 -0
- recipe_recommendation/src/__pycache__/__init__.cpython-313.pyc +0 -0
- recipe_recommendation/src/__pycache__/candidate.cpython-313.pyc +0 -0
- recipe_recommendation/src/__pycache__/coldstart.cpython-313.pyc +0 -0
- recipe_recommendation/src/__pycache__/embedding.cpython-313.pyc +0 -0
- recipe_recommendation/src/__pycache__/feature.cpython-313.pyc +0 -0
- recipe_recommendation/src/__pycache__/highlight.cpython-313.pyc +0 -0
- recipe_recommendation/src/__pycache__/io.cpython-313.pyc +0 -0
- recipe_recommendation/src/__pycache__/trainmodel.cpython-313.pyc +0 -0
- recipe_recommendation/src/candidate.py +365 -0
- recipe_recommendation/src/coldstart.py +279 -0
- recipe_recommendation/src/embedding.py +100 -0
- recipe_recommendation/src/feature.py +176 -0
- recipe_recommendation/src/highlight.py +91 -0
- recipe_recommendation/src/io.py +37 -0
- recipe_recommendation/src/trainmodel.py +237 -0
- recipe_recommendation/user_data/demo_user_1/user_profile.json +28 -0
- recipe_recommendation/user_data/user_0/feature_order.json +22 -0
- recipe_recommendation/user_data/user_0/feedback.csv +2 -0
- recipe_recommendation/user_data/user_0/qid.txt +1 -0
- recipe_recommendation/user_data/user_0/ranker.pkl +3 -0
- recipe_recommendation/user_data/user_0/user_features_rank.csv +0 -0
- recipe_recommendation/user_data/user_0/user_profile.json +26 -0
- recipe_recommendation/user_data/user_1/feature_order.json +22 -0
- recipe_recommendation/user_data/user_1/feedback.csv +3 -0
- recipe_recommendation/user_data/user_1/qid.txt +1 -0
- recipe_recommendation/user_data/user_1/ranker.pkl +3 -0
- recipe_recommendation/user_data/user_1/user_features_rank.csv +0 -0
- recipe_recommendation/user_data/user_1/user_profile.json +26 -0
- recipe_recommendation/user_data/user_2/feature_order.json +22 -0
- recipe_recommendation/user_data/user_2/feedback.csv +2 -0
.gradio/certificate.pem
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-----BEGIN CERTIFICATE-----
|
| 2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
| 3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
| 4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
| 5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
| 6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
| 7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
| 8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
| 9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
| 10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
| 11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
| 12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
| 13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
| 14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
| 15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
| 16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
| 17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
| 18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
| 19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
| 20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
| 21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
| 22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
| 23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
| 24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
| 25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
| 26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
| 27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
| 28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
| 29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
| 30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
| 31 |
+
-----END CERTIFICATE-----
|
README.md
CHANGED
|
@@ -1,12 +1,152 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Smart Fridge Recipe Assistant
|
| 2 |
+
|
| 3 |
+
The Smart Fridge Recipe Assistant combines Roboflow-powered ingredient detection with a multi-stage recipe recommendation engine. Upload a photo of your fridge and instantly receive recipe ideas that respect your dietary preferences, nutritional goals, and ingredient availability.
|
| 4 |
+
|
| 5 |
+

|
| 6 |
+
|
| 7 |
+
## Features
|
| 8 |
+
|
| 9 |
+
- **Visual ingredient detection** – Uses a Roboflow YOLO model to detect fridge items, annotate the photo, and build a structured ingredient payload.
|
| 10 |
+
- **Robust recipe ranking pipeline** – Performs coarse ranking, ML reranking, and clustering-based diversification using pretrained user profiles.
|
| 11 |
+
- **Personalized dietary controls** – Configure vegetarian style, allergies, preferred cuisines, macro ranges, and cooking time caps directly in the UI.
|
| 12 |
+
- **Interactive feedback loop** – Record positive feedback for recommended recipes to continuously refine personal models.
|
| 13 |
+
- **One-click examples** – Try the demo instantly with bundled sample fridge photos.
|
| 14 |
+
|
| 15 |
+
## Project structure
|
| 16 |
+
|
| 17 |
+
```
|
| 18 |
+
smartFridge/
|
| 19 |
+
├── app.py # Gradio user interface
|
| 20 |
+
├── frige_detect/ # Roboflow detector & demo assets
|
| 21 |
+
│ ├── detect.py
|
| 22 |
+
│ ├── demo/
|
| 23 |
+
│ └── roboflow_credentials.txt
|
| 24 |
+
├── recipe_recommendation/ # Recommendation engine
|
| 25 |
+
│ ├── main.py
|
| 26 |
+
│ ├── src/
|
| 27 |
+
│ └── user_data/
|
| 28 |
+
├── requirements.txt
|
| 29 |
+
└── README.md
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
## Installation
|
| 33 |
+
|
| 34 |
+
1. Create a new Python environment (recommended).
|
| 35 |
+
2. Install dependencies:
|
| 36 |
+
|
| 37 |
+
```bash
|
| 38 |
+
pip install -r requirements.txt
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
The Roboflow API key and project information used by the detector are stored in `frige_detect/roboflow_credentials.txt` and loaded automatically; no manual input is required.
|
| 42 |
+
|
| 43 |
+
```markdown
|
| 44 |
+
## Running the app locally
|
| 45 |
+
|
| 46 |
+
```bash
|
| 47 |
+
python app.py
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
This command launches a Gradio web interface with share link enabled. In the browser you can:
|
| 51 |
+
|
| 52 |
+
### Core Features
|
| 53 |
+
|
| 54 |
+
**1. Quick Start with Examples**
|
| 55 |
+
- Select from predefined user profiles (user_1, user_2, user_3) with different dietary preferences
|
| 56 |
+
- Choose from example fridge images (t1.jpg, t2.jpg, t3.jpg)
|
| 57 |
+
- Mix and match any profile with any image for testing
|
| 58 |
+
|
| 59 |
+
**2. Custom User Profiles**
|
| 60 |
+
- Create new user profiles by entering a custom User ID
|
| 61 |
+
- Configure comprehensive dietary preferences:
|
| 62 |
+
- **Vegetarian type**: flexible, flexible_vegetarian, ovo_vegetarian, lacto_vegetarian, vegan, non_vegetarian
|
| 63 |
+
- **Allergies**: comma-separated list (e.g., "peanut, shrimp")
|
| 64 |
+
- **Region preferences**: comma-separated (e.g., "Asia, Europe")
|
| 65 |
+
- **Nutritional goals**:
|
| 66 |
+
- Calorie range (min/max sliders from 0-4000)
|
| 67 |
+
- Protein range (min/max sliders from 0-250g)
|
| 68 |
+
- **Ingredient preferences**:
|
| 69 |
+
- Preferred main ingredients (e.g., "chicken, tofu")
|
| 70 |
+
- Disliked main ingredients (e.g., "lamb, beef")
|
| 71 |
+
- **Cooking time limit**: maximum cooking time in minutes (0-180)
|
| 72 |
+
|
| 73 |
+
**3. Smart Fridge Detection & Recipe Recommendation**
|
| 74 |
+
- Upload your own fridge photo or use example images
|
| 75 |
+
- Click **"Analyze fridge & recommend recipes"**
|
| 76 |
+
- The system will:
|
| 77 |
+
- Detect ingredients using the Roboflow computer vision model
|
| 78 |
+
- Map detected items to parent ingredient categories
|
| 79 |
+
- Filter recipes based on your dietary restrictions, nutrition goals, and disliked ingredients
|
| 80 |
+
- Score and rank recipes using ML-based personalization
|
| 81 |
+
- Apply region preference boosting and ingredient matching
|
| 82 |
+
- Diversify results using KMeans clustering to ensure variety
|
| 83 |
+
|
| 84 |
+
**4. Automatic Profile Management**
|
| 85 |
+
- User profiles are **automatically saved/updated** every time you click "Analyze"
|
| 86 |
+
- No manual save required - just modify preferences and run
|
| 87 |
+
- Feedback count is preserved when updating existing profiles
|
| 88 |
+
- All profiles stored under `recipe_recommendation/user_data/<user_id>/`
|
| 89 |
+
|
| 90 |
+
**5. Feedback System**
|
| 91 |
+
- Review the top 5 recommended recipes with detailed information:
|
| 92 |
+
- Recipe name and match score
|
| 93 |
+
- Region and cuisine type
|
| 94 |
+
- Nutritional information (calories, protein)
|
| 95 |
+
- Main, staple, and other ingredients used
|
| 96 |
+
- Select your favorite recipe from the dropdown
|
| 97 |
+
- Press **"Save feedback"** to log positive feedback
|
| 98 |
+
- Feedback is used to retrain personalized ranking models (every 20 feedback entries)
|
| 99 |
+
|
| 100 |
+
### How the Recommendation Pipeline Works
|
| 101 |
+
|
| 102 |
+
1. **Detection**: Roboflow model identifies ingredients in your fridge photo
|
| 103 |
+
2. **Mapping**: Detected items are mapped to parent categories (e.g., "chicken breast" → "chicken")
|
| 104 |
+
3. **Hard Filtering**:
|
| 105 |
+
- Removes recipes violating dietary restrictions (vegan/vegetarian)
|
| 106 |
+
- Filters out recipes outside your calorie/protein ranges
|
| 107 |
+
- Eliminates recipes containing disliked main ingredients
|
| 108 |
+
4. **Coarse Ranking**: Fast ingredient matching across 20,000+ candidates
|
| 109 |
+
5. **ML Reranking**: Personalized ranking using your trained model (or similar user's model)
|
| 110 |
+
6. **Diversification**: KMeans clustering ensures variety in final recommendations
|
| 111 |
+
7. **Top-K Selection**: Returns the best 5 recipes tailored to your preferences
|
| 112 |
+
|
| 113 |
+
All user profiles, feedback files, trained models, and feature rankings are stored under `recipe_recommendation/user_data/<user_id>/`.
|
| 114 |
+
|
| 115 |
+
## Dataset & Models
|
| 116 |
+
|
| 117 |
+
### Computer Vision Model
|
| 118 |
+
- **Fridge ingredient detection**: [Roboflow Nutrition Object Detection](https://universe.roboflow.com/ie-wqegj/nutrition-object-detection)
|
| 119 |
+
- Pre-trained model for detecting common food items in refrigerator images
|
| 120 |
+
- Provides bounding boxes and confidence scores for detected ingredients
|
| 121 |
+
- Credentials stored in `frige_detect/roboflow_credentials.txt`
|
| 122 |
+
|
| 123 |
+
### Recipe Dataset
|
| 124 |
+
- **Recipe database**: Fetched from Hugging Face dataset [`Iris314/recipe-cleaned`](https://huggingface.co/datasets/Iris314/recipe-cleaned)
|
| 125 |
+
- **Ingredient mappings**: Hierarchical mapping from specific items to parent categories
|
| 126 |
+
- Both are automatically downloaded on first run and cached locally
|
| 127 |
+
|
| 128 |
+
### Ranking Models
|
| 129 |
+
- User-specific ranking models are automatically:
|
| 130 |
+
- Bootstrapped using cold-start features for new users
|
| 131 |
+
- Copied from similar users (based on profile embedding similarity)
|
| 132 |
+
- Retrained every 20 feedback entries to improve personalization
|
| 133 |
+
- Models stored per user at `recipe_recommendation/user_data/<user_id>/ranker.pkl`
|
| 134 |
+
|
| 135 |
+
## Deploying to Hugging Face Spaces
|
| 136 |
+
|
| 137 |
+
To deploy this application to Hugging Face Spaces:
|
| 138 |
+
|
| 139 |
+
1. Create a new Space on Hugging Face with Gradio SDK
|
| 140 |
+
2. Upload this repository to the Space
|
| 141 |
+
3. Ensure `app.py` is set as the main application file
|
| 142 |
+
4. The Space will automatically run `python app.py` on startup
|
| 143 |
+
5. No additional environment variables or secrets required (Roboflow credentials are bundled)
|
| 144 |
+
|
| 145 |
+
The deployed app will have the same functionality as the local version, including persistent user profiles and feedback storage.
|
| 146 |
+
|
| 147 |
+
## License
|
| 148 |
+
|
| 149 |
+
This project bundles third-party datasets and models subject to their respective licenses:
|
| 150 |
+
- Roboflow Nutrition Object Detection model: Subject to [Roboflow Terms of Service](https://roboflow.com/terms)
|
| 151 |
+
- Recipe dataset from Hugging Face: Check the [`Iris314/recipe-cleaned`](https://huggingface.co/datasets/Iris314/recipe-cleaned) dataset page for licensing details
|
| 152 |
+
```
|
app.py
ADDED
|
@@ -0,0 +1,592 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gradio application for the smart fridge detector + recipe recommendation pipeline."""
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import tempfile
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import List, Tuple, Dict, Any
|
| 7 |
+
|
| 8 |
+
import cv2
|
| 9 |
+
import gradio as gr
|
| 10 |
+
import numpy as np
|
| 11 |
+
from PIL import Image
|
| 12 |
+
|
| 13 |
+
from frige_detect.detect import (
|
| 14 |
+
detect_and_generate,
|
| 15 |
+
load_roboflow_credentials,
|
| 16 |
+
RoboflowCredentials,
|
| 17 |
+
)
|
| 18 |
+
from recipe_recommendation.main import (
|
| 19 |
+
load_recipes,
|
| 20 |
+
recommend_recipes,
|
| 21 |
+
save_user_profile,
|
| 22 |
+
get_feedback,
|
| 23 |
+
USER_DATA_DIR,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# ---------------------------------------------------------------------------
|
| 27 |
+
# Global resources
|
| 28 |
+
# ---------------------------------------------------------------------------
|
| 29 |
+
CREDENTIALS_PATH = Path("frige_detect/roboflow_credentials.txt")
|
| 30 |
+
ROBOFLOW_CREDENTIALS: RoboflowCredentials = load_roboflow_credentials(str(CREDENTIALS_PATH))
|
| 31 |
+
RECIPES_DF = load_recipes()
|
| 32 |
+
|
| 33 |
+
# ---------------------------------------------------------------------------
|
| 34 |
+
# Predefined user profiles for examples
|
| 35 |
+
# ---------------------------------------------------------------------------
|
| 36 |
+
EXAMPLE_PROFILES = {
|
| 37 |
+
"user_1": {
|
| 38 |
+
"vegetarian_type": "flexible",
|
| 39 |
+
"allergies": "",
|
| 40 |
+
"regions": "North America",
|
| 41 |
+
"calorie_min": 250,
|
| 42 |
+
"calorie_max": 2000,
|
| 43 |
+
"protein_min": 50,
|
| 44 |
+
"protein_max": 160,
|
| 45 |
+
"preferred_main": "",
|
| 46 |
+
"disliked_main": "",
|
| 47 |
+
"cooking_time": 45,
|
| 48 |
+
},
|
| 49 |
+
"user_2": {
|
| 50 |
+
"vegetarian_type": "flexible_vegetarian",
|
| 51 |
+
"allergies": "shrimp",
|
| 52 |
+
"regions": "Asia",
|
| 53 |
+
"calorie_min": 400,
|
| 54 |
+
"calorie_max": 1500,
|
| 55 |
+
"protein_min": 40,
|
| 56 |
+
"protein_max": 120,
|
| 57 |
+
"preferred_main": "tofu",
|
| 58 |
+
"disliked_main": "beef",
|
| 59 |
+
"cooking_time": 60,
|
| 60 |
+
},
|
| 61 |
+
"user_3": {
|
| 62 |
+
"vegetarian_type": "non_vegetarian",
|
| 63 |
+
"allergies": "",
|
| 64 |
+
"regions": "Europe",
|
| 65 |
+
"calorie_min": 500,
|
| 66 |
+
"calorie_max": 2000,
|
| 67 |
+
"protein_min": 80,
|
| 68 |
+
"protein_max": 160,
|
| 69 |
+
"preferred_main": "beef, chicken",
|
| 70 |
+
"disliked_main": "",
|
| 71 |
+
"cooking_time": 45,
|
| 72 |
+
},
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
# Predefined example images
|
| 76 |
+
EXAMPLE_IMAGES = [
|
| 77 |
+
"frige_detect/demo/t1.jpg",
|
| 78 |
+
"frige_detect/demo/t2.jpg",
|
| 79 |
+
"frige_detect/demo/t3.jpg",
|
| 80 |
+
]
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
# ---------------------------------------------------------------------------
|
| 84 |
+
# Helper utilities
|
| 85 |
+
# ---------------------------------------------------------------------------
|
| 86 |
+
def parse_csv_list(text: str) -> List[str]:
|
| 87 |
+
if not text:
|
| 88 |
+
return []
|
| 89 |
+
parts = [item.strip() for item in text.split(",") if item.strip()]
|
| 90 |
+
return parts
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def ensure_numpy_image(image: Any) -> np.ndarray:
|
| 94 |
+
"""Convert incoming image (PIL or numpy) to RGB numpy array."""
|
| 95 |
+
if image is None:
|
| 96 |
+
raise ValueError("Please upload a fridge photo before running detection.")
|
| 97 |
+
if isinstance(image, np.ndarray):
|
| 98 |
+
return image
|
| 99 |
+
if isinstance(image, Image.Image):
|
| 100 |
+
return np.array(image.convert("RGB"))
|
| 101 |
+
raise ValueError("Unsupported image format provided.")
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def write_temp_image(image: np.ndarray) -> str:
|
| 105 |
+
"""Write numpy image to a temporary file and return the path."""
|
| 106 |
+
temp_dir = Path(tempfile.mkdtemp(prefix="fridge_upload_"))
|
| 107 |
+
temp_path = temp_dir / "upload.jpg"
|
| 108 |
+
bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
| 109 |
+
cv2.imwrite(str(temp_path), bgr_image)
|
| 110 |
+
return str(temp_path)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def build_user_profile(
|
| 114 |
+
user_id: str,
|
| 115 |
+
vegetarian_type: str,
|
| 116 |
+
allergies: str,
|
| 117 |
+
regions: str,
|
| 118 |
+
calorie_range: Tuple[float, float],
|
| 119 |
+
protein_range: Tuple[float, float],
|
| 120 |
+
preferred_main: str,
|
| 121 |
+
disliked_main: str,
|
| 122 |
+
cooking_time: float,
|
| 123 |
+
) -> Dict[str, Any]:
|
| 124 |
+
"""
|
| 125 |
+
Build and save user profile. This function ALWAYS creates or overwrites the profile
|
| 126 |
+
with the current input values, enabling users to modify preferences on-the-fly.
|
| 127 |
+
"""
|
| 128 |
+
user_id = user_id.strip()
|
| 129 |
+
if not user_id:
|
| 130 |
+
raise ValueError("User ID cannot be empty.")
|
| 131 |
+
|
| 132 |
+
profile_dir = USER_DATA_DIR / user_id
|
| 133 |
+
profile_path = profile_dir / "user_profile.json"
|
| 134 |
+
|
| 135 |
+
# Preserve feedback count if profile exists
|
| 136 |
+
num_feedback = 0
|
| 137 |
+
if profile_path.exists():
|
| 138 |
+
try:
|
| 139 |
+
existing = json.loads(profile_path.read_text(encoding="utf-8"))
|
| 140 |
+
num_feedback = existing.get("num_feedback", 0)
|
| 141 |
+
except Exception:
|
| 142 |
+
pass
|
| 143 |
+
|
| 144 |
+
profile = {
|
| 145 |
+
"user_id": user_id,
|
| 146 |
+
"num_feedback": num_feedback,
|
| 147 |
+
"diet": {"vegetarian_type": vegetarian_type},
|
| 148 |
+
"allergies": parse_csv_list(allergies),
|
| 149 |
+
"region_preference": parse_csv_list(regions),
|
| 150 |
+
"nutritional_goals": {
|
| 151 |
+
"calories": {"min": int(calorie_range[0]), "max": int(calorie_range[1])},
|
| 152 |
+
"protein": {"min": int(protein_range[0]), "max": int(protein_range[1])},
|
| 153 |
+
},
|
| 154 |
+
"other_preferences": {
|
| 155 |
+
"preferred_main": parse_csv_list(preferred_main),
|
| 156 |
+
"disliked_main": parse_csv_list(disliked_main),
|
| 157 |
+
"cooking_time_max": int(cooking_time) if cooking_time else None,
|
| 158 |
+
},
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
# Always save the profile (create new or overwrite existing)
|
| 162 |
+
save_user_profile(user_id, profile)
|
| 163 |
+
print(f"[app] Profile saved/updated for user '{user_id}'")
|
| 164 |
+
|
| 165 |
+
return profile
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def summarize_ingredients(
|
| 169 |
+
user_parents: List[str],
|
| 170 |
+
high_conf: List[str],
|
| 171 |
+
low_conf: List[str],
|
| 172 |
+
) -> str:
|
| 173 |
+
lines = ["### Ingredient Mapping"]
|
| 174 |
+
if user_parents:
|
| 175 |
+
lines.append("- **Mapped parent ingredients:** " + ", ".join(sorted(user_parents)))
|
| 176 |
+
else:
|
| 177 |
+
lines.append("- **Mapped parent ingredients:** none")
|
| 178 |
+
if high_conf:
|
| 179 |
+
lines.append("- **High confidence detections:** " + ", ".join(sorted(high_conf)))
|
| 180 |
+
if low_conf:
|
| 181 |
+
lines.append("- **Low confidence detections:** " + ", ".join(sorted(set(low_conf))))
|
| 182 |
+
return "\n".join(lines)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def _ensure_iterable(value: Any) -> List[str]:
|
| 186 |
+
if value is None:
|
| 187 |
+
return []
|
| 188 |
+
if isinstance(value, set):
|
| 189 |
+
return sorted(value)
|
| 190 |
+
if isinstance(value, list):
|
| 191 |
+
return value
|
| 192 |
+
if isinstance(value, str):
|
| 193 |
+
return [value]
|
| 194 |
+
return list(value)
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def render_recommendations(df) -> Tuple[str, List[Dict[str, Any]]]:
|
| 198 |
+
if df is None or df.empty:
|
| 199 |
+
return "No recipes matched the current constraints.", []
|
| 200 |
+
|
| 201 |
+
lines = ["### Recommended Recipes"]
|
| 202 |
+
feedback_rows: List[Dict[str, Any]] = []
|
| 203 |
+
|
| 204 |
+
for idx, row in df.head(5).iterrows():
|
| 205 |
+
match_score = row.get("match_score") or row.get("ml_score", 0)
|
| 206 |
+
scaled = match_score * 100 if match_score is not None else 0
|
| 207 |
+
name = row.get("name", f"Recipe {idx+1}")
|
| 208 |
+
lines.append(f"{idx + 1}. **{name}** — score {scaled:.1f}%")
|
| 209 |
+
|
| 210 |
+
region = row.get("region")
|
| 211 |
+
if region and not (isinstance(region, float) and np.isnan(region)):
|
| 212 |
+
if isinstance(region, (set, list)):
|
| 213 |
+
region_str = ", ".join(sorted(region))
|
| 214 |
+
else:
|
| 215 |
+
region_str = str(region)
|
| 216 |
+
lines.append(f" - Region: {region_str}")
|
| 217 |
+
|
| 218 |
+
cuisine = row.get("cuisine_attr")
|
| 219 |
+
cuisine_items = _ensure_iterable(cuisine)
|
| 220 |
+
if cuisine_items:
|
| 221 |
+
lines.append(f" - Cuisine: {', '.join(cuisine_items)}")
|
| 222 |
+
|
| 223 |
+
calories = row.get("calories")
|
| 224 |
+
protein = row.get("protein")
|
| 225 |
+
if calories is not None:
|
| 226 |
+
lines.append(f" - Calories: {calories}")
|
| 227 |
+
if protein is not None:
|
| 228 |
+
lines.append(f" - Protein: {protein}")
|
| 229 |
+
|
| 230 |
+
for key in ["main_parent", "staple_parent", "other_parent"]:
|
| 231 |
+
parents = _ensure_iterable(row.get(key))
|
| 232 |
+
if parents:
|
| 233 |
+
pretty_key = key.replace("_", " ").title()
|
| 234 |
+
lines.append(f" - {pretty_key}: {', '.join(parents)}")
|
| 235 |
+
|
| 236 |
+
ingredients = row.get("ingredients")
|
| 237 |
+
if ingredients:
|
| 238 |
+
if isinstance(ingredients, str):
|
| 239 |
+
ingredients_list = parse_csv_list(ingredients)
|
| 240 |
+
else:
|
| 241 |
+
ingredients_list = list(ingredients)
|
| 242 |
+
if ingredients_list:
|
| 243 |
+
lines.append(f" - Ingredients: {', '.join(ingredients_list[:10])}")
|
| 244 |
+
lines.append("")
|
| 245 |
+
|
| 246 |
+
feedback_row = row.to_dict()
|
| 247 |
+
for key in ["main_parent", "staple_parent", "other_parent", "seasoning_parent", "cuisine_attr", "ingredients"]:
|
| 248 |
+
value = feedback_row.get(key)
|
| 249 |
+
if isinstance(value, list):
|
| 250 |
+
feedback_row[key] = set(value)
|
| 251 |
+
elif isinstance(value, str):
|
| 252 |
+
feedback_row[key] = set(parse_csv_list(value))
|
| 253 |
+
feedback_rows.append(feedback_row)
|
| 254 |
+
|
| 255 |
+
return "\n".join(lines).strip(), feedback_rows
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
def load_example_profile(profile_name: str):
|
| 259 |
+
"""Load a predefined user profile configuration."""
|
| 260 |
+
if profile_name in EXAMPLE_PROFILES:
|
| 261 |
+
config = EXAMPLE_PROFILES[profile_name]
|
| 262 |
+
return (
|
| 263 |
+
profile_name,
|
| 264 |
+
config["vegetarian_type"],
|
| 265 |
+
config["allergies"],
|
| 266 |
+
config["regions"],
|
| 267 |
+
config["calorie_min"],
|
| 268 |
+
config["calorie_max"],
|
| 269 |
+
config["protein_min"],
|
| 270 |
+
config["protein_max"],
|
| 271 |
+
config["preferred_main"],
|
| 272 |
+
config["disliked_main"],
|
| 273 |
+
config["cooking_time"],
|
| 274 |
+
)
|
| 275 |
+
# Default fallback
|
| 276 |
+
return ("user_custom", "flexible", "", "", 400, 2000, 50, 160, "", "", 45)
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
def load_example_image(image_path: str):
|
| 280 |
+
"""Load an example image."""
|
| 281 |
+
return image_path
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
def run_pipeline(
|
| 285 |
+
image,
|
| 286 |
+
user_id,
|
| 287 |
+
vegetarian_type,
|
| 288 |
+
allergies,
|
| 289 |
+
regions,
|
| 290 |
+
calorie_min,
|
| 291 |
+
calorie_max,
|
| 292 |
+
protein_min,
|
| 293 |
+
protein_max,
|
| 294 |
+
preferred_main,
|
| 295 |
+
disliked_main,
|
| 296 |
+
cooking_time,
|
| 297 |
+
):
|
| 298 |
+
"""
|
| 299 |
+
Main pipeline function.
|
| 300 |
+
This ALWAYS creates/updates the user profile based on current input values,
|
| 301 |
+
then runs detection and recommendation.
|
| 302 |
+
"""
|
| 303 |
+
try:
|
| 304 |
+
rgb_image = ensure_numpy_image(image)
|
| 305 |
+
upload_path = write_temp_image(rgb_image)
|
| 306 |
+
temp_dir = Path(tempfile.mkdtemp(prefix="fridge_outputs_"))
|
| 307 |
+
output_json = temp_dir / "recipe_input.json"
|
| 308 |
+
output_image = temp_dir / "annotated_image.jpg"
|
| 309 |
+
|
| 310 |
+
detection_result = detect_and_generate(
|
| 311 |
+
image_path=upload_path,
|
| 312 |
+
credentials=ROBOFLOW_CREDENTIALS,
|
| 313 |
+
conf_threshold=0.4,
|
| 314 |
+
overlap_threshold=0.3,
|
| 315 |
+
conf_split=0.7,
|
| 316 |
+
output_json=str(output_json),
|
| 317 |
+
output_image=str(output_image),
|
| 318 |
+
)
|
| 319 |
+
Path(upload_path).unlink(missing_ok=True)
|
| 320 |
+
|
| 321 |
+
#2: Always create/update user profile with current UI values
|
| 322 |
+
profile = build_user_profile(
|
| 323 |
+
user_id,
|
| 324 |
+
vegetarian_type,
|
| 325 |
+
allergies,
|
| 326 |
+
regions,
|
| 327 |
+
(calorie_min, calorie_max),
|
| 328 |
+
(protein_min, protein_max),
|
| 329 |
+
preferred_main,
|
| 330 |
+
disliked_main,
|
| 331 |
+
cooking_time,
|
| 332 |
+
)
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
import time
|
| 336 |
+
time.sleep(0.2)
|
| 337 |
+
|
| 338 |
+
detection_payload = detection_result["recipe_json"]
|
| 339 |
+
ml_top, user_parents, high_conf, low_conf = recommend_recipes(
|
| 340 |
+
detection_payload,
|
| 341 |
+
user_id,
|
| 342 |
+
RECIPES_DF,
|
| 343 |
+
topk=5,
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
ingredient_summary = summarize_ingredients(user_parents, high_conf, low_conf)
|
| 347 |
+
recommendation_md, feedback_rows = render_recommendations(ml_top)
|
| 348 |
+
|
| 349 |
+
dropdown_choices = [
|
| 350 |
+
f"{idx + 1}. {row.get('name', 'Recipe')}" for idx, row in enumerate(feedback_rows)
|
| 351 |
+
]
|
| 352 |
+
|
| 353 |
+
status = "" if feedback_rows else "No recipes available for feedback yet."
|
| 354 |
+
|
| 355 |
+
# Add success message about profile creation/update
|
| 356 |
+
profile_status = f"✓ Profile '{user_id}' has been saved/updated with your current preferences."
|
| 357 |
+
|
| 358 |
+
return (
|
| 359 |
+
str(output_image),
|
| 360 |
+
detection_payload,
|
| 361 |
+
ingredient_summary,
|
| 362 |
+
recommendation_md,
|
| 363 |
+
gr.Dropdown(choices=dropdown_choices, value=None),
|
| 364 |
+
feedback_rows,
|
| 365 |
+
profile_status,
|
| 366 |
+
)
|
| 367 |
+
except Exception as exc:
|
| 368 |
+
import traceback
|
| 369 |
+
error_detail = traceback.format_exc()
|
| 370 |
+
return (
|
| 371 |
+
None,
|
| 372 |
+
None,
|
| 373 |
+
"",
|
| 374 |
+
f"⚠️ Error: {exc}\n\nDetails:\n{error_detail}",
|
| 375 |
+
gr.Dropdown(choices=[], value=None),
|
| 376 |
+
[],
|
| 377 |
+
f"⚠️ Error: {exc}",
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
def record_feedback(selected_recipe: str, user_id: str, feedback_rows: List[Dict[str, Any]]):
|
| 382 |
+
if not selected_recipe:
|
| 383 |
+
return "Please select a recipe before submitting feedback."
|
| 384 |
+
if not user_id:
|
| 385 |
+
return "Please provide a valid user ID."
|
| 386 |
+
if not feedback_rows:
|
| 387 |
+
return "No recommendation data available. Run the pipeline first."
|
| 388 |
+
|
| 389 |
+
try:
|
| 390 |
+
index = int(selected_recipe.split(".")[0]) - 1
|
| 391 |
+
except (ValueError, IndexError):
|
| 392 |
+
return "Unable to parse the selected recipe."
|
| 393 |
+
|
| 394 |
+
if index < 0 or index >= len(feedback_rows):
|
| 395 |
+
return "Selected recipe is out of range."
|
| 396 |
+
|
| 397 |
+
recipe_row = feedback_rows[index]
|
| 398 |
+
get_feedback(user_id, recipe_row)
|
| 399 |
+
|
| 400 |
+
profile_path = USER_DATA_DIR / user_id / "user_profile.json"
|
| 401 |
+
if profile_path.exists():
|
| 402 |
+
data = json.loads(profile_path.read_text(encoding="utf-8"))
|
| 403 |
+
data["num_feedback"] = data.get("num_feedback", 0) + 1
|
| 404 |
+
save_user_profile(user_id, data)
|
| 405 |
+
|
| 406 |
+
return f"✓ Feedback recorded for {recipe_row.get('name', 'selected recipe')}!"
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
# ---------------------------------------------------------------------------
|
| 410 |
+
# Gradio UI definition
|
| 411 |
+
# ---------------------------------------------------------------------------
|
| 412 |
+
with gr.Blocks(title="Smart Fridge Recipe Assistant", theme=gr.themes.Soft()) as demo:
|
| 413 |
+
gr.Markdown(
|
| 414 |
+
"""
|
| 415 |
+
# Smart Fridge Recipe Assistant
|
| 416 |
+
**How to use:**
|
| 417 |
+
1. (Optional) Select an example profile and/or image from dropdowns
|
| 418 |
+
2. Modify any preferences in the form - your profile will be saved automatically when you click Analyze
|
| 419 |
+
3. Upload or select a fridge image
|
| 420 |
+
4. Click "Analyze fridge & recommend recipes"
|
| 421 |
+
"""
|
| 422 |
+
)
|
| 423 |
+
|
| 424 |
+
with gr.Row():
|
| 425 |
+
with gr.Column(scale=1):
|
| 426 |
+
gr.Markdown("### Quick Start Examples")
|
| 427 |
+
profile_selector = gr.Dropdown(
|
| 428 |
+
label="Choose a predefined user profile",
|
| 429 |
+
choices=list(EXAMPLE_PROFILES.keys()),
|
| 430 |
+
value=None,
|
| 431 |
+
)
|
| 432 |
+
|
| 433 |
+
image_selector = gr.Dropdown(
|
| 434 |
+
label="Choose an example fridge image",
|
| 435 |
+
choices=[f"Image {i+1}: {img}" for i, img in enumerate(EXAMPLE_IMAGES)],
|
| 436 |
+
value=None,
|
| 437 |
+
)
|
| 438 |
+
|
| 439 |
+
image_input = gr.Image(
|
| 440 |
+
label="Fridge photo (upload or use example)",
|
| 441 |
+
type="pil",
|
| 442 |
+
height=350,
|
| 443 |
+
)
|
| 444 |
+
|
| 445 |
+
detection_json = gr.JSON(label="Detection payload")
|
| 446 |
+
annotated_output = gr.Image(label="Annotated detection", height=350)
|
| 447 |
+
|
| 448 |
+
with gr.Column(scale=1):
|
| 449 |
+
gr.Markdown("### User Preferences (auto-saved on each run)")
|
| 450 |
+
user_id_box = gr.Textbox(
|
| 451 |
+
label="User ID (will create new profile if doesn't exist)",
|
| 452 |
+
value="user_custom",
|
| 453 |
+
placeholder="e.g. my_new_profile",
|
| 454 |
+
)
|
| 455 |
+
vegetarian_radio = gr.Radio(
|
| 456 |
+
[
|
| 457 |
+
"flexible",
|
| 458 |
+
"flexible_vegetarian",
|
| 459 |
+
"ovo_vegetarian",
|
| 460 |
+
"lacto_vegetarian",
|
| 461 |
+
"vegan",
|
| 462 |
+
"non_vegetarian",
|
| 463 |
+
],
|
| 464 |
+
label="Vegetarian preference",
|
| 465 |
+
value="flexible",
|
| 466 |
+
)
|
| 467 |
+
allergies_box = gr.Textbox(
|
| 468 |
+
label="Allergies (comma separated)",
|
| 469 |
+
placeholder="peanut, shrimp",
|
| 470 |
+
)
|
| 471 |
+
regions_box = gr.Textbox(
|
| 472 |
+
label="Preferred regions (comma separated)",
|
| 473 |
+
placeholder="Asia, Europe",
|
| 474 |
+
)
|
| 475 |
+
calorie_min = gr.Slider(
|
| 476 |
+
minimum=0,
|
| 477 |
+
maximum=4000,
|
| 478 |
+
value=400,
|
| 479 |
+
label="Minimum Calories",
|
| 480 |
+
step=50,
|
| 481 |
+
)
|
| 482 |
+
calorie_max = gr.Slider(
|
| 483 |
+
minimum=0,
|
| 484 |
+
maximum=4000,
|
| 485 |
+
value=2000,
|
| 486 |
+
label="Maximum Calories",
|
| 487 |
+
step=50,
|
| 488 |
+
)
|
| 489 |
+
protein_min = gr.Slider(
|
| 490 |
+
minimum=0,
|
| 491 |
+
maximum=250,
|
| 492 |
+
value=50,
|
| 493 |
+
label="Minimum Protein (g)",
|
| 494 |
+
step=5,
|
| 495 |
+
)
|
| 496 |
+
protein_max = gr.Slider(
|
| 497 |
+
minimum=0,
|
| 498 |
+
maximum=250,
|
| 499 |
+
value=160,
|
| 500 |
+
label="Maximum Protein (g)",
|
| 501 |
+
step=5,
|
| 502 |
+
)
|
| 503 |
+
preferred_box = gr.Textbox(
|
| 504 |
+
label="Preferred main ingredients",
|
| 505 |
+
placeholder="chicken, tofu",
|
| 506 |
+
)
|
| 507 |
+
disliked_box = gr.Textbox(
|
| 508 |
+
label="Disliked main ingredients",
|
| 509 |
+
placeholder="lamb",
|
| 510 |
+
)
|
| 511 |
+
cooking_slider = gr.Slider(
|
| 512 |
+
minimum=0,
|
| 513 |
+
maximum=180,
|
| 514 |
+
value=45,
|
| 515 |
+
step=5,
|
| 516 |
+
label="Max cooking time (minutes)",
|
| 517 |
+
)
|
| 518 |
+
run_button = gr.Button("Analyze fridge & recommend recipes", variant="primary")
|
| 519 |
+
ingredient_md = gr.Markdown()
|
| 520 |
+
recommendation_md = gr.Markdown()
|
| 521 |
+
feedback_dropdown = gr.Dropdown(label="Select a recipe for positive feedback", choices=[])
|
| 522 |
+
feedback_button = gr.Button("Save feedback")
|
| 523 |
+
feedback_status = gr.Markdown()
|
| 524 |
+
feedback_state = gr.State([])
|
| 525 |
+
|
| 526 |
+
# Connect profile selector
|
| 527 |
+
profile_selector.change(
|
| 528 |
+
fn=load_example_profile,
|
| 529 |
+
inputs=[profile_selector],
|
| 530 |
+
outputs=[
|
| 531 |
+
user_id_box,
|
| 532 |
+
vegetarian_radio,
|
| 533 |
+
allergies_box,
|
| 534 |
+
regions_box,
|
| 535 |
+
calorie_min,
|
| 536 |
+
calorie_max,
|
| 537 |
+
protein_min,
|
| 538 |
+
protein_max,
|
| 539 |
+
preferred_box,
|
| 540 |
+
disliked_box,
|
| 541 |
+
cooking_slider,
|
| 542 |
+
],
|
| 543 |
+
)
|
| 544 |
+
|
| 545 |
+
# Connect image selector
|
| 546 |
+
def select_image(choice):
|
| 547 |
+
if choice:
|
| 548 |
+
idx = int(choice.split(":")[0].replace("Image ", "")) - 1
|
| 549 |
+
return EXAMPLE_IMAGES[idx]
|
| 550 |
+
return None
|
| 551 |
+
|
| 552 |
+
image_selector.change(
|
| 553 |
+
fn=select_image,
|
| 554 |
+
inputs=[image_selector],
|
| 555 |
+
outputs=[image_input],
|
| 556 |
+
)
|
| 557 |
+
|
| 558 |
+
run_button.click(
|
| 559 |
+
fn=run_pipeline,
|
| 560 |
+
inputs=[
|
| 561 |
+
image_input,
|
| 562 |
+
user_id_box,
|
| 563 |
+
vegetarian_radio,
|
| 564 |
+
allergies_box,
|
| 565 |
+
regions_box,
|
| 566 |
+
calorie_min,
|
| 567 |
+
calorie_max,
|
| 568 |
+
protein_min,
|
| 569 |
+
protein_max,
|
| 570 |
+
preferred_box,
|
| 571 |
+
disliked_box,
|
| 572 |
+
cooking_slider,
|
| 573 |
+
],
|
| 574 |
+
outputs=[
|
| 575 |
+
annotated_output,
|
| 576 |
+
detection_json,
|
| 577 |
+
ingredient_md,
|
| 578 |
+
recommendation_md,
|
| 579 |
+
feedback_dropdown,
|
| 580 |
+
feedback_state,
|
| 581 |
+
feedback_status,
|
| 582 |
+
],
|
| 583 |
+
)
|
| 584 |
+
|
| 585 |
+
feedback_button.click(
|
| 586 |
+
fn=record_feedback,
|
| 587 |
+
inputs=[feedback_dropdown, user_id_box, feedback_state],
|
| 588 |
+
outputs=feedback_status,
|
| 589 |
+
)
|
| 590 |
+
|
| 591 |
+
if __name__ == "__main__":
|
| 592 |
+
demo.launch(share=True)
|
frige_detect/__pycache__/detect.cpython-313.pyc
ADDED
|
Binary file (8.28 kB). View file
|
|
|
frige_detect/annotated_image.jpg
ADDED
|
frige_detect/demo/t1.jpg
ADDED
|
frige_detect/demo/t2.jpg
ADDED
|
frige_detect/demo/t3.jpg
ADDED
|
frige_detect/demo/t4.jpg
ADDED
|
frige_detect/detect.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Detect ingredients using a Roboflow model with preprocessing:
|
| 4 |
+
- Resize images to 640x640 if needed.
|
| 5 |
+
- Perform detection.
|
| 6 |
+
- Classify object sizes via K-Means.
|
| 7 |
+
- Generate JSON and annotated image outputs.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import json
|
| 11 |
+
import os
|
| 12 |
+
import tempfile
|
| 13 |
+
from dataclasses import dataclass
|
| 14 |
+
|
| 15 |
+
import cv2
|
| 16 |
+
import numpy as np
|
| 17 |
+
from roboflow import Roboflow
|
| 18 |
+
from sklearn.cluster import KMeans
|
| 19 |
+
import supervision as sv
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@dataclass
|
| 23 |
+
class RoboflowCredentials:
|
| 24 |
+
api_key: str
|
| 25 |
+
project_name: str
|
| 26 |
+
version: int = 1
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def load_roboflow_credentials(path: str) -> RoboflowCredentials:
|
| 30 |
+
"""Load Roboflow API credentials from a simple key=value text file."""
|
| 31 |
+
if not os.path.exists(path):
|
| 32 |
+
raise FileNotFoundError(
|
| 33 |
+
f"Roboflow credential file not found: {path}."
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
api_key = None
|
| 37 |
+
project_name = None
|
| 38 |
+
version = 1
|
| 39 |
+
|
| 40 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 41 |
+
for line in f:
|
| 42 |
+
line = line.strip()
|
| 43 |
+
if not line or line.startswith("#"):
|
| 44 |
+
continue
|
| 45 |
+
if "=" not in line:
|
| 46 |
+
continue
|
| 47 |
+
key, value = line.split("=", 1)
|
| 48 |
+
key = key.strip().lower()
|
| 49 |
+
value = value.strip()
|
| 50 |
+
if key == "api_key":
|
| 51 |
+
api_key = value
|
| 52 |
+
elif key == "project_name":
|
| 53 |
+
project_name = value
|
| 54 |
+
elif key == "version":
|
| 55 |
+
try:
|
| 56 |
+
version = int(value)
|
| 57 |
+
except ValueError:
|
| 58 |
+
raise ValueError("Version in credential file must be an integer") from None
|
| 59 |
+
|
| 60 |
+
if not api_key or not project_name:
|
| 61 |
+
raise ValueError(
|
| 62 |
+
"Credential file must contain api_key and project_name entries."
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
return RoboflowCredentials(api_key=api_key, project_name=project_name, version=version)
|
| 66 |
+
|
| 67 |
+
def compute_area_ratios(predictions, img_shape):
|
| 68 |
+
"""Compute area ratio (bbox area / image area) for each detection."""
|
| 69 |
+
img_area = float(img_shape[0] * img_shape[1])
|
| 70 |
+
ratios = []
|
| 71 |
+
for pred in predictions:
|
| 72 |
+
area = pred["width"] * pred["height"]
|
| 73 |
+
ratios.append(area / img_area)
|
| 74 |
+
return np.array(ratios).reshape(-1, 1)
|
| 75 |
+
|
| 76 |
+
def cluster_sizes(area_ratios):
|
| 77 |
+
"""Cluster area ratios into two groups using K-Means and return size labels."""
|
| 78 |
+
kmeans = KMeans(n_clusters=2, init="k-means++", random_state=0)
|
| 79 |
+
labels = kmeans.fit_predict(area_ratios)
|
| 80 |
+
centroids = kmeans.cluster_centers_.flatten()
|
| 81 |
+
large_cluster = np.argmax(centroids)
|
| 82 |
+
return ["large" if lbl == large_cluster else "small" for lbl in labels]
|
| 83 |
+
|
| 84 |
+
def detect_and_generate(
|
| 85 |
+
image_path: str,
|
| 86 |
+
credentials: RoboflowCredentials,
|
| 87 |
+
conf_threshold: float = 0.4,
|
| 88 |
+
overlap_threshold: float = 0.3,
|
| 89 |
+
conf_split: float = 0.7,
|
| 90 |
+
output_json: str = "recipe_input.json",
|
| 91 |
+
output_image: str = "annotated_image.jpg"
|
| 92 |
+
):
|
| 93 |
+
"""
|
| 94 |
+
Resize image if necessary, run detection, classify sizes via K-Means, and
|
| 95 |
+
create both JSON output and annotated image.
|
| 96 |
+
|
| 97 |
+
Args:
|
| 98 |
+
image_path (str): Path to the original image.
|
| 99 |
+
api_key (str): Roboflow API key.
|
| 100 |
+
project_name (str): Roboflow project name.
|
| 101 |
+
version (int): Model version.
|
| 102 |
+
conf_threshold (float): Minimum confidence threshold (0–1).
|
| 103 |
+
overlap_threshold (float): NMS overlap threshold (0–1).
|
| 104 |
+
conf_split (float): Threshold for high/low confidence lists.
|
| 105 |
+
output_json (str): Output JSON filename.
|
| 106 |
+
output_image (str): Output annotated image filename.
|
| 107 |
+
|
| 108 |
+
Returns:
|
| 109 |
+
dict: Recipe input JSON structure.
|
| 110 |
+
"""
|
| 111 |
+
# Load original image
|
| 112 |
+
original_img = cv2.imread(image_path)
|
| 113 |
+
if original_img is None:
|
| 114 |
+
raise FileNotFoundError(f"Image not found: {image_path}")
|
| 115 |
+
|
| 116 |
+
height, width = original_img.shape[:2]
|
| 117 |
+
|
| 118 |
+
# Preprocess: resize to 640x640 if needed, and save to a temp file
|
| 119 |
+
if height != 640 or width != 640:
|
| 120 |
+
resized_img = cv2.resize(original_img, (640, 640))
|
| 121 |
+
# create temporary file via mkstemp; close fd to avoid locking
|
| 122 |
+
fd, tmp_path = tempfile.mkstemp(suffix=".jpg")
|
| 123 |
+
os.close(fd)
|
| 124 |
+
cv2.imwrite(tmp_path, resized_img)
|
| 125 |
+
detection_path = tmp_path
|
| 126 |
+
img_for_annotation = resized_img
|
| 127 |
+
else:
|
| 128 |
+
detection_path = image_path
|
| 129 |
+
img_for_annotation = original_img
|
| 130 |
+
|
| 131 |
+
# Initialize Roboflow model
|
| 132 |
+
rf = Roboflow(api_key=credentials.api_key)
|
| 133 |
+
model = rf.workspace().project(credentials.project_name).version(credentials.version).model
|
| 134 |
+
|
| 135 |
+
# Run prediction
|
| 136 |
+
response = model.predict(
|
| 137 |
+
detection_path,
|
| 138 |
+
confidence=int(conf_threshold * 100),
|
| 139 |
+
overlap=int(overlap_threshold * 100)
|
| 140 |
+
).json()
|
| 141 |
+
predictions = response["predictions"]
|
| 142 |
+
|
| 143 |
+
# Classify sizes using K-Means
|
| 144 |
+
area_ratios = compute_area_ratios(predictions, img_for_annotation.shape)
|
| 145 |
+
size_labels = cluster_sizes(area_ratios)
|
| 146 |
+
|
| 147 |
+
# Build JSON structure
|
| 148 |
+
ingredients = []
|
| 149 |
+
high_conf = []
|
| 150 |
+
low_conf = []
|
| 151 |
+
for pred, size_label in zip(predictions, size_labels):
|
| 152 |
+
name = pred["class"]
|
| 153 |
+
conf = pred["confidence"]
|
| 154 |
+
ingredients.append({
|
| 155 |
+
"name": name,
|
| 156 |
+
"quantity": size_label,
|
| 157 |
+
"confidence": round(conf, 2)
|
| 158 |
+
})
|
| 159 |
+
if conf >= conf_split:
|
| 160 |
+
high_conf.append(name)
|
| 161 |
+
else:
|
| 162 |
+
low_conf.append(name)
|
| 163 |
+
|
| 164 |
+
recipe_json = {
|
| 165 |
+
"ingredients": ingredients,
|
| 166 |
+
"high_confidence_ingredients": high_conf,
|
| 167 |
+
"low_confidence_ingredients": low_conf
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
# Write JSON to file
|
| 171 |
+
with open(output_json, "w", encoding="utf-8") as jf:
|
| 172 |
+
json.dump(recipe_json, jf, indent=4)
|
| 173 |
+
|
| 174 |
+
# Annotate image with bounding boxes and confidence labels
|
| 175 |
+
detections = sv.Detections.from_inference(response)
|
| 176 |
+
label_annotator = sv.LabelAnnotator()
|
| 177 |
+
box_annotator = sv.BoxAnnotator()
|
| 178 |
+
|
| 179 |
+
labels_for_annotation = [
|
| 180 |
+
f"{pred['class']} ({pred['confidence']:.2f})" for pred in predictions
|
| 181 |
+
]
|
| 182 |
+
|
| 183 |
+
annotated_img = box_annotator.annotate(
|
| 184 |
+
scene=img_for_annotation.copy(),
|
| 185 |
+
detections=detections
|
| 186 |
+
)
|
| 187 |
+
annotated_img = label_annotator.annotate(
|
| 188 |
+
scene=annotated_img,
|
| 189 |
+
detections=detections,
|
| 190 |
+
labels=labels_for_annotation
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
cv2.imwrite(output_image, annotated_img)
|
| 194 |
+
|
| 195 |
+
# Display annotated image (optional, for notebooks)
|
| 196 |
+
# Clean up temporary file
|
| 197 |
+
if height != 640 or width != 640:
|
| 198 |
+
try:
|
| 199 |
+
os.remove(tmp_path)
|
| 200 |
+
except PermissionError:
|
| 201 |
+
# If still locked on Windows, delay deletion or log a warning
|
| 202 |
+
pass
|
| 203 |
+
|
| 204 |
+
return {
|
| 205 |
+
"recipe_json": recipe_json,
|
| 206 |
+
"output_json_path": output_json,
|
| 207 |
+
"annotated_image_path": output_image,
|
| 208 |
+
}
|
frige_detect/recipe_input.json
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"ingredients": [
|
| 3 |
+
{
|
| 4 |
+
"name": "sugar",
|
| 5 |
+
"quantity": "large",
|
| 6 |
+
"confidence": 0.91
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"name": "chicken",
|
| 10 |
+
"quantity": "large",
|
| 11 |
+
"confidence": 0.91
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"name": "milk",
|
| 15 |
+
"quantity": "large",
|
| 16 |
+
"confidence": 0.89
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"name": "flour",
|
| 20 |
+
"quantity": "large",
|
| 21 |
+
"confidence": 0.88
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"name": "eggs",
|
| 25 |
+
"quantity": "small",
|
| 26 |
+
"confidence": 0.88
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"name": "apple",
|
| 30 |
+
"quantity": "large",
|
| 31 |
+
"confidence": 0.86
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"name": "corn",
|
| 35 |
+
"quantity": "small",
|
| 36 |
+
"confidence": 0.85
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"name": "blueberries",
|
| 40 |
+
"quantity": "small",
|
| 41 |
+
"confidence": 0.83
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chicken_breast",
|
| 45 |
+
"quantity": "large",
|
| 46 |
+
"confidence": 0.82
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"name": "ground_beef",
|
| 50 |
+
"quantity": "large",
|
| 51 |
+
"confidence": 0.81
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"name": "beef",
|
| 55 |
+
"quantity": "large",
|
| 56 |
+
"confidence": 0.77
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"name": "carrot",
|
| 60 |
+
"quantity": "large",
|
| 61 |
+
"confidence": 0.75
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"name": "bread",
|
| 65 |
+
"quantity": "large",
|
| 66 |
+
"confidence": 0.51
|
| 67 |
+
}
|
| 68 |
+
],
|
| 69 |
+
"high_confidence_ingredients": [
|
| 70 |
+
"sugar",
|
| 71 |
+
"chicken",
|
| 72 |
+
"milk",
|
| 73 |
+
"flour",
|
| 74 |
+
"eggs",
|
| 75 |
+
"apple",
|
| 76 |
+
"corn",
|
| 77 |
+
"blueberries",
|
| 78 |
+
"chicken_breast",
|
| 79 |
+
"ground_beef",
|
| 80 |
+
"beef",
|
| 81 |
+
"carrot"
|
| 82 |
+
],
|
| 83 |
+
"low_confidence_ingredients": [
|
| 84 |
+
"bread"
|
| 85 |
+
]
|
| 86 |
+
}
|
frige_detect/roboflow_credentials.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Roboflow credentials used by the app and detector
|
| 2 |
+
api_key=t2nRJrn7ppJIC8RGHdwk
|
| 3 |
+
project_name=nutrition-object-detection
|
| 4 |
+
version=1
|
recipe_recommendation/__init__.py
ADDED
|
File without changes
|
recipe_recommendation/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (191 Bytes). View file
|
|
|
recipe_recommendation/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (26.8 kB). View file
|
|
|
recipe_recommendation/data/ingredient_map.data
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
recipe_recommendation/main.py
ADDED
|
@@ -0,0 +1,652 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# main.py
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
"""
|
| 4 |
+
Entry point for the new pipeline:
|
| 5 |
+
1) I/O init & parsing
|
| 6 |
+
2) Load user parents from recipe_input.json via ingredient_map (children -> parent)
|
| 7 |
+
3) Ensure cold-start features & trained ranker exist
|
| 8 |
+
4) Step 2: Coarse ranking
|
| 9 |
+
5) Step 3: ML reranking
|
| 10 |
+
6) Pretty print top results
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import os
|
| 14 |
+
import json
|
| 15 |
+
import ast
|
| 16 |
+
import pandas as pd
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
import shutil
|
| 19 |
+
|
| 20 |
+
from recipe_recommendation.src.io import load_recipes_csv, load_ingredient_map, download_file
|
| 21 |
+
from recipe_recommendation.src.coldstart import cold_start_ranker
|
| 22 |
+
from recipe_recommendation.src.trainmodel import train_model_ranker
|
| 23 |
+
from recipe_recommendation.src.candidate import (
|
| 24 |
+
coarse_rank_candidates,
|
| 25 |
+
ml_generate_candidates,
|
| 26 |
+
hard_filter,
|
| 27 |
+
)
|
| 28 |
+
from recipe_recommendation.src.highlight import (
|
| 29 |
+
print_candidates,
|
| 30 |
+
diversify_topk_with_min_clusters,
|
| 31 |
+
)
|
| 32 |
+
from recipe_recommendation.src.feature import build_features, build_cluster_features
|
| 33 |
+
from recipe_recommendation.src.embedding import find_most_similar_user
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
BASE_DIR = Path(__file__).resolve().parent
|
| 37 |
+
USER_DATA_DIR = BASE_DIR / "user_data"
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def load_recipes() -> pd.DataFrame:
|
| 42 |
+
"""
|
| 43 |
+
Load recipes.csv as DataFrame and assign a unique recipe_id to each row.
|
| 44 |
+
This keeps io.py focused on downloading only.
|
| 45 |
+
"""
|
| 46 |
+
path = download_file("recipes.csv")
|
| 47 |
+
df = pd.read_csv(path)
|
| 48 |
+
df.reset_index(drop=True, inplace=True)
|
| 49 |
+
df["recipe_id"] = df.index
|
| 50 |
+
return df
|
| 51 |
+
|
| 52 |
+
# ---------------------------
|
| 53 |
+
# Helpers: parsing utilities
|
| 54 |
+
# ---------------------------
|
| 55 |
+
def parse_list(x):
|
| 56 |
+
"""Parse a cell into Python list; tolerant to str/NaN/set."""
|
| 57 |
+
if isinstance(x, list):
|
| 58 |
+
return x
|
| 59 |
+
if x is None or (isinstance(x, float) and pd.isna(x)):
|
| 60 |
+
return []
|
| 61 |
+
if isinstance(x, set):
|
| 62 |
+
return list(x)
|
| 63 |
+
s = str(x).strip()
|
| 64 |
+
if not s:
|
| 65 |
+
return []
|
| 66 |
+
# Try literal eval first
|
| 67 |
+
try:
|
| 68 |
+
v = ast.literal_eval(s)
|
| 69 |
+
if isinstance(v, list):
|
| 70 |
+
return v
|
| 71 |
+
if isinstance(v, set):
|
| 72 |
+
return list(v)
|
| 73 |
+
except Exception:
|
| 74 |
+
pass
|
| 75 |
+
# Fallback: comma-separated
|
| 76 |
+
s = s.strip("[]")
|
| 77 |
+
parts = [t.strip() for t in s.split(",") if t.strip()]
|
| 78 |
+
return parts
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def parse_set(x):
|
| 82 |
+
"""Parse a cell into Python set via parse_list."""
|
| 83 |
+
return set(parse_list(x))
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
# -------------------------------------
|
| 87 |
+
# Map user CV result -> parent set
|
| 88 |
+
# -------------------------------------
|
| 89 |
+
def load_user_parents_from_json(json_path, ingredient_map, conf_th=0.8):
|
| 90 |
+
"""
|
| 91 |
+
Map raw ingredient names to parent categories using ingredient_map["children"].
|
| 92 |
+
If a name is already a parent in ingredient_map["parents"], keep it.
|
| 93 |
+
Unknown terms are skipped.
|
| 94 |
+
"""
|
| 95 |
+
parents_map = ingredient_map.get("parents", {}) or {}
|
| 96 |
+
children_map = ingredient_map.get("children", {}) or {}
|
| 97 |
+
|
| 98 |
+
if not os.path.exists(json_path):
|
| 99 |
+
raise FileNotFoundError(f"recipe_input.json not found at: {json_path}")
|
| 100 |
+
|
| 101 |
+
with open(json_path, "r", encoding="utf-8") as f:
|
| 102 |
+
data = json.load(f)
|
| 103 |
+
|
| 104 |
+
out = []
|
| 105 |
+
hi, lo = [], []
|
| 106 |
+
for ing in data.get("ingredients", []):
|
| 107 |
+
name = (ing.get("name") or "").strip().lower().replace("_", " ")
|
| 108 |
+
conf = float(ing.get("confidence", 0.0))
|
| 109 |
+
parent = None
|
| 110 |
+
if name in children_map:
|
| 111 |
+
# Prefer "parent" field; fall back to "fallback" if present
|
| 112 |
+
parent = children_map[name].get("parent") or children_map[name].get("fallback")
|
| 113 |
+
elif name in parents_map:
|
| 114 |
+
parent = name
|
| 115 |
+
|
| 116 |
+
if parent and conf >= conf_th:
|
| 117 |
+
out.append(parent)
|
| 118 |
+
hi.append((name, parent))
|
| 119 |
+
else:
|
| 120 |
+
lo.append(name)
|
| 121 |
+
|
| 122 |
+
if hi:
|
| 123 |
+
print("High-confidence ingredients mapped to parents:")
|
| 124 |
+
for child, p in hi:
|
| 125 |
+
print(f" - {child} → {p}")
|
| 126 |
+
if lo:
|
| 127 |
+
print(f"Ignored (low confidence or no parent found): {sorted(set(lo))}")
|
| 128 |
+
|
| 129 |
+
return sorted(set(out))
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def normalize_user_profile(profile):
|
| 133 |
+
"""Fill missing keys and set defaults to avoid None errors downstream."""
|
| 134 |
+
# Diet
|
| 135 |
+
diet = profile.get("diet", {})
|
| 136 |
+
profile["diet"] = {"vegetarian_type": diet.get("vegetarian_type", "flexible")}
|
| 137 |
+
|
| 138 |
+
# Allergies
|
| 139 |
+
if "allergies" not in profile or profile["allergies"] is None:
|
| 140 |
+
profile["allergies"] = []
|
| 141 |
+
|
| 142 |
+
# Region
|
| 143 |
+
if "region_preference" not in profile or profile["region_preference"] is None:
|
| 144 |
+
profile["region_preference"] = []
|
| 145 |
+
|
| 146 |
+
# Nutritional goals
|
| 147 |
+
if "nutritional_goals" not in profile or profile["nutritional_goals"] is None:
|
| 148 |
+
profile["nutritional_goals"] = {
|
| 149 |
+
"calories": {"min": 0, "max": 9999},
|
| 150 |
+
"protein": {"min": 0, "max": 999}
|
| 151 |
+
}
|
| 152 |
+
else:
|
| 153 |
+
ng = profile["nutritional_goals"]
|
| 154 |
+
ng["calories"] = ng.get("calories", {"min": 0, "max": 9999})
|
| 155 |
+
ng["protein"] = ng.get("protein", {"min": 0, "max": 999})
|
| 156 |
+
|
| 157 |
+
# Other preferences
|
| 158 |
+
other = profile.get("other_preferences", {})
|
| 159 |
+
if not other:
|
| 160 |
+
other = {}
|
| 161 |
+
other["preferred_main"] = other.get("preferred_main", [])
|
| 162 |
+
other["disliked_main"] = other.get("disliked_main", [])
|
| 163 |
+
other["cooking_time_max"] = other.get("cooking_time_max", None)
|
| 164 |
+
profile["other_preferences"] = other
|
| 165 |
+
|
| 166 |
+
return profile
|
| 167 |
+
|
| 168 |
+
def is_profile_empty(profile):
|
| 169 |
+
"""Return True if the profile has almost no meaningful preferences."""
|
| 170 |
+
if profile.get("diet", {}).get("vegetarian_type") not in [None, "", "flexible"]:
|
| 171 |
+
return False
|
| 172 |
+
if profile.get("allergies"):
|
| 173 |
+
return False
|
| 174 |
+
if profile.get("region_preference"):
|
| 175 |
+
return False
|
| 176 |
+
|
| 177 |
+
ng = profile.get("nutritional_goals", {})
|
| 178 |
+
if ng.get("calories") or ng.get("protein"):
|
| 179 |
+
c = ng.get("calories", {})
|
| 180 |
+
p = ng.get("protein", {})
|
| 181 |
+
if c.get("min", 0) > 0 or c.get("max", 0) < 9999:
|
| 182 |
+
return False
|
| 183 |
+
if p.get("min", 0) > 0 or p.get("max", 0) < 999:
|
| 184 |
+
return False
|
| 185 |
+
|
| 186 |
+
other = profile.get("other_preferences", {})
|
| 187 |
+
if other.get("preferred_main") or other.get("disliked_main") or other.get("cooking_time_max"):
|
| 188 |
+
return False
|
| 189 |
+
|
| 190 |
+
return True
|
| 191 |
+
|
| 192 |
+
def fill_default_preferences(profile):
|
| 193 |
+
"""
|
| 194 |
+
Fill some lightweight, neutral defaults so that hard_filter and cold_start
|
| 195 |
+
can work efficiently even for new users with no explicit preferences.
|
| 196 |
+
"""
|
| 197 |
+
profile["diet"]["vegetarian_type"] = "flexible"
|
| 198 |
+
profile["region_preference"] = ["North America", "Europe"]
|
| 199 |
+
profile["nutritional_goals"]["protein"] = {"min": 50, "max": 150}
|
| 200 |
+
profile["nutritional_goals"]["calories"] = {"min": 400, "max": 2000}
|
| 201 |
+
profile["other_preferences"]["cooking_time_max"] = 45
|
| 202 |
+
return profile
|
| 203 |
+
|
| 204 |
+
def ensure_user_profile(user_id):
|
| 205 |
+
"""
|
| 206 |
+
Load user profile JSON, normalize structure, and fill default preferences
|
| 207 |
+
if the profile is empty. This ensures downstream code never breaks on None
|
| 208 |
+
and avoids extremely slow cold start for users with no preferences.
|
| 209 |
+
"""
|
| 210 |
+
import os, json
|
| 211 |
+
|
| 212 |
+
profile_file = USER_DATA_DIR / user_id / "user_profile.json"
|
| 213 |
+
if not os.path.exists(profile_file):
|
| 214 |
+
raise FileNotFoundError(
|
| 215 |
+
f"Missing profile: {profile_file}. Please create one first."
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
# Load profile
|
| 219 |
+
with open(profile_file, "r", encoding="utf-8") as f:
|
| 220 |
+
profile = json.load(f)
|
| 221 |
+
# Normalize structure
|
| 222 |
+
profile = normalize_user_profile(profile)
|
| 223 |
+
# Detect if almost empty
|
| 224 |
+
if is_profile_empty(profile):
|
| 225 |
+
print(f"[profile] User {user_id} has an empty or near-empty profile. Filling defaults...")
|
| 226 |
+
profile = fill_default_preferences(profile)
|
| 227 |
+
|
| 228 |
+
return profile
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def save_user_profile(user_id, profile):
|
| 232 |
+
profile_path = USER_DATA_DIR / user_id / "user_profile.json"
|
| 233 |
+
profile_path.parent.mkdir(parents=True, exist_ok=True)
|
| 234 |
+
with open(profile_path, "w", encoding="utf-8") as f:
|
| 235 |
+
json.dump(profile, f, indent=2)
|
| 236 |
+
|
| 237 |
+
def collect_user_feedback(user_id: str, selected_recipe_row: dict, user_profile: dict, qid: int):
|
| 238 |
+
"""
|
| 239 |
+
Collect a single feedback sample.
|
| 240 |
+
- Uses build_features() to ensure feature alignment with training
|
| 241 |
+
- Maintains a fixed feature order via feature_order.json
|
| 242 |
+
"""
|
| 243 |
+
user_dir = USER_DATA_DIR / user_id
|
| 244 |
+
user_dir.mkdir(parents=True, exist_ok=True)
|
| 245 |
+
feedback_path = user_dir / "feedback.csv"
|
| 246 |
+
feature_order_path = user_dir / "feature_order.json"
|
| 247 |
+
|
| 248 |
+
recipe_dict = {
|
| 249 |
+
"main": selected_recipe_row.get("main_parent", set()),
|
| 250 |
+
"staple": selected_recipe_row.get("staple_parent", set()),
|
| 251 |
+
"other": selected_recipe_row.get("other_parent", set()),
|
| 252 |
+
"seasoning": selected_recipe_row.get("seasoning_parent", set()),
|
| 253 |
+
"matched_main": len(selected_recipe_row.get("main_parent", set()) & set(user_profile.get("user_parents", []))),
|
| 254 |
+
"matched_staple": len(selected_recipe_row.get("staple_parent", set()) & set(user_profile.get("user_parents", []))),
|
| 255 |
+
"matched_other": len(selected_recipe_row.get("other_parent", set()) & set(user_profile.get("user_parents", []))),
|
| 256 |
+
"calories": selected_recipe_row.get("calories", 0),
|
| 257 |
+
"protein": selected_recipe_row.get("protein", 0),
|
| 258 |
+
"fat": selected_recipe_row.get("fat", 0),
|
| 259 |
+
"region": selected_recipe_row.get("region", ""),
|
| 260 |
+
"cuisine_attr": selected_recipe_row.get("cuisine_attr", []),
|
| 261 |
+
"ingredients": selected_recipe_row.get("ingredients", []),
|
| 262 |
+
"minutes": selected_recipe_row.get("minutes", None),
|
| 263 |
+
}
|
| 264 |
+
features = build_features(recipe_dict, user_profile)
|
| 265 |
+
|
| 266 |
+
if os.path.exists(feature_order_path):
|
| 267 |
+
with open(feature_order_path, "r", encoding="utf-8") as f:
|
| 268 |
+
feature_order = json.load(f)
|
| 269 |
+
else:
|
| 270 |
+
feature_order = list(features.keys())
|
| 271 |
+
with open(feature_order_path, "w", encoding="utf-8") as f:
|
| 272 |
+
json.dump(feature_order, f, indent=2)
|
| 273 |
+
|
| 274 |
+
for feat in features.keys():
|
| 275 |
+
if feat not in feature_order:
|
| 276 |
+
feature_order.append(feat)
|
| 277 |
+
with open(feature_order_path, "w", encoding="utf-8") as f:
|
| 278 |
+
json.dump(feature_order, f, indent=2)
|
| 279 |
+
|
| 280 |
+
row_data = {feat: features.get(feat, 0) for feat in feature_order}
|
| 281 |
+
row_data["recipe_id"] = selected_recipe_row["recipe_id"]
|
| 282 |
+
row_data["qid"] = qid
|
| 283 |
+
row_data["relevance"] = 5
|
| 284 |
+
|
| 285 |
+
new_row_df = pd.DataFrame([row_data])
|
| 286 |
+
|
| 287 |
+
if os.path.exists(feedback_path):
|
| 288 |
+
old_df = pd.read_csv(feedback_path)
|
| 289 |
+
for col in new_row_df.columns:
|
| 290 |
+
if col not in old_df.columns:
|
| 291 |
+
old_df[col] = 0
|
| 292 |
+
for col in old_df.columns:
|
| 293 |
+
if col not in new_row_df.columns:
|
| 294 |
+
new_row_df[col] = 0
|
| 295 |
+
df = pd.concat([old_df, new_row_df], ignore_index=True)
|
| 296 |
+
else:
|
| 297 |
+
df = new_row_df
|
| 298 |
+
df.to_csv(feedback_path, index=False)
|
| 299 |
+
print(f"[feedback] Saved user feedback to {feedback_path} ({len(df)} rows total)")
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
# def ensure_model(user_id):
|
| 303 |
+
# base_dir = USER_DATA_DIR / user_id
|
| 304 |
+
# base_dir.mkdir(parents=True, exist_ok=True)
|
| 305 |
+
# features_rank = base_dir / "user_features_rank.csv"
|
| 306 |
+
# model_file = base_dir / "ranker.pkl"
|
| 307 |
+
|
| 308 |
+
# if not os.path.exists(features_rank):
|
| 309 |
+
# print("[main] No cold-start features found; running cold_start_ranker() ...")
|
| 310 |
+
# cold_start_ranker(user_id=user_id)
|
| 311 |
+
|
| 312 |
+
# if not os.path.exists(model_file):
|
| 313 |
+
# print("[main] No model found; training ranker with train_model_ranker() ...")
|
| 314 |
+
# train_model_ranker(user_id=user_id)
|
| 315 |
+
|
| 316 |
+
# return model_file
|
| 317 |
+
|
| 318 |
+
def ensure_model(user_id):
|
| 319 |
+
base_dir = USER_DATA_DIR / user_id
|
| 320 |
+
base_dir.mkdir(parents=True, exist_ok=True)
|
| 321 |
+
features_rank = base_dir / "user_features_rank.csv"
|
| 322 |
+
model_file = base_dir / "ranker.pkl"
|
| 323 |
+
|
| 324 |
+
if not os.path.exists(features_rank):
|
| 325 |
+
print("[main] No cold-start features found; running cold_start_ranker() ...")
|
| 326 |
+
# pass user_data_dir
|
| 327 |
+
cold_start_ranker(user_id=user_id, user_data_dir=str(USER_DATA_DIR))
|
| 328 |
+
|
| 329 |
+
if not os.path.exists(model_file):
|
| 330 |
+
print("[main] No model found; training ranker with train_model_ranker() ...")
|
| 331 |
+
train_model_ranker(user_id=user_id)
|
| 332 |
+
|
| 333 |
+
return model_file
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
def prepare_recipes_df(df: pd.DataFrame) -> pd.DataFrame:
|
| 337 |
+
"""
|
| 338 |
+
Normalize key columns to list/set shapes that our candidate/feature modules expect.
|
| 339 |
+
"""
|
| 340 |
+
df = df.copy()
|
| 341 |
+
|
| 342 |
+
# list-like columns
|
| 343 |
+
for col in ["staple", "main", "seasoning", "other", "ingredients"]:
|
| 344 |
+
if col in df.columns:
|
| 345 |
+
df[col] = df[col].apply(parse_list)
|
| 346 |
+
|
| 347 |
+
# set-like columns
|
| 348 |
+
for col in ["staple_parent", "main_parent", "seasoning_parent", "other_parent", "cuisine_attr"]:
|
| 349 |
+
if col in df.columns:
|
| 350 |
+
df[col] = df[col].apply(parse_set)
|
| 351 |
+
|
| 352 |
+
# region: allow str or set; if it looks like list/set, cast to set; otherwise keep str
|
| 353 |
+
if "region" in df.columns:
|
| 354 |
+
def _region_norm(x):
|
| 355 |
+
if isinstance(x, (set, list)):
|
| 356 |
+
return set(x)
|
| 357 |
+
try:
|
| 358 |
+
v = ast.literal_eval(str(x))
|
| 359 |
+
if isinstance(v, (set, list)):
|
| 360 |
+
return set(v)
|
| 361 |
+
except Exception:
|
| 362 |
+
pass
|
| 363 |
+
return str(x) if pd.notna(x) else ""
|
| 364 |
+
df["region"] = df["region"].apply(_region_norm)
|
| 365 |
+
|
| 366 |
+
return df
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def maybe_retrain_model(user_id):
|
| 370 |
+
profile_path = USER_DATA_DIR / user_id / "user_profile.json"
|
| 371 |
+
if not profile_path.exists():
|
| 372 |
+
return
|
| 373 |
+
|
| 374 |
+
profile = json.loads(profile_path.read_text())
|
| 375 |
+
n_fb = profile.get("num_feedback", 0)
|
| 376 |
+
|
| 377 |
+
if n_fb > 0 and n_fb % 20 == 0:
|
| 378 |
+
print(f"[main] {n_fb} feedback reached, retraining ranker...")
|
| 379 |
+
|
| 380 |
+
model_path = USER_DATA_DIR / user_id / "ranker.pkl"
|
| 381 |
+
if model_path.exists():
|
| 382 |
+
model_path.unlink()
|
| 383 |
+
|
| 384 |
+
train_model_ranker(user_id)
|
| 385 |
+
|
| 386 |
+
def get_next_qid(user_id: str) -> int:
|
| 387 |
+
user_dir = USER_DATA_DIR / user_id
|
| 388 |
+
user_dir.mkdir(parents=True, exist_ok=True)
|
| 389 |
+
qid_path = user_dir / "qid.txt"
|
| 390 |
+
|
| 391 |
+
if qid_path.exists():
|
| 392 |
+
qid = int(qid_path.read_text()) + 1
|
| 393 |
+
else:
|
| 394 |
+
qid = 0
|
| 395 |
+
qid_path.write_text(str(qid))
|
| 396 |
+
return qid
|
| 397 |
+
|
| 398 |
+
def maybe_reuse_model(user_id, threshold=0.85):
|
| 399 |
+
match_uid, sim = find_most_similar_user(user_id, threshold=threshold)
|
| 400 |
+
if match_uid:
|
| 401 |
+
print(f"[model reuse] Reusing {match_uid}'s model for {user_id} (sim={sim:.3f})")
|
| 402 |
+
return match_uid
|
| 403 |
+
return None
|
| 404 |
+
|
| 405 |
+
def main(user_id="user_1",
|
| 406 |
+
recipe_input_json=None,
|
| 407 |
+
topk=5,
|
| 408 |
+
topn_coarse=20000):
|
| 409 |
+
# 1) I/O init
|
| 410 |
+
maybe_retrain_model(user_id)
|
| 411 |
+
|
| 412 |
+
recipes_df = load_recipes()
|
| 413 |
+
ingredient_map = load_ingredient_map()
|
| 414 |
+
|
| 415 |
+
# 2) Load user_parents from recipe_input.json (fall back to /data if needed)
|
| 416 |
+
if recipe_input_json is None:
|
| 417 |
+
# prefer project root; then /data
|
| 418 |
+
default_candidates = [
|
| 419 |
+
os.path.join("data", "recipe_input.json"),
|
| 420 |
+
"recipe_input.json",
|
| 421 |
+
"/data/recipe_input.json",
|
| 422 |
+
]
|
| 423 |
+
recipe_input_json = next((p for p in default_candidates if os.path.exists(p)), default_candidates[-1])
|
| 424 |
+
|
| 425 |
+
user_parents = load_user_parents_from_json(recipe_input_json, ingredient_map, conf_th=0.8)
|
| 426 |
+
|
| 427 |
+
# 3) Load user profile
|
| 428 |
+
user_profile = ensure_user_profile(user_id)
|
| 429 |
+
|
| 430 |
+
# Embedding similarity fallback
|
| 431 |
+
match_uid, sim = find_most_similar_user(user_id, threshold=0.85)
|
| 432 |
+
if match_uid is not None:
|
| 433 |
+
print(f"[main] Using model of similar user '{match_uid}' for '{user_id}' (sim={sim:.3f})")
|
| 434 |
+
|
| 435 |
+
src_dir = USER_DATA_DIR / match_uid
|
| 436 |
+
dst_dir = USER_DATA_DIR / user_id
|
| 437 |
+
dst_dir.mkdir(parents=True, exist_ok=True)
|
| 438 |
+
|
| 439 |
+
for fname in ["ranker.pkl", "user_features_rank.csv"]:
|
| 440 |
+
src = src_dir / fname
|
| 441 |
+
dst = dst_dir / fname
|
| 442 |
+
if os.path.exists(src) and not os.path.exists(dst):
|
| 443 |
+
shutil.copyfile(src, dst)
|
| 444 |
+
print(f"[embedding] Copied {fname} from {match_uid} to {user_id}")
|
| 445 |
+
|
| 446 |
+
# 4) Ensure cold-start features & model
|
| 447 |
+
model_path = ensure_model(user_id)
|
| 448 |
+
|
| 449 |
+
# 5) Prepare recipes & coarse rank (Step 2)
|
| 450 |
+
df = prepare_recipes_df(recipes_df)
|
| 451 |
+
recipes_records = df.to_dict(orient="records")
|
| 452 |
+
|
| 453 |
+
filtered_records = [r for r in recipes_records if hard_filter(r, user_profile)]
|
| 454 |
+
if not filtered_records:
|
| 455 |
+
print("[main] No recipes after hard dietary filtering.")
|
| 456 |
+
return
|
| 457 |
+
|
| 458 |
+
coarse = coarse_rank_candidates(
|
| 459 |
+
recipes=recipes_records,
|
| 460 |
+
user_parents=user_parents,
|
| 461 |
+
user_profile=user_profile,
|
| 462 |
+
top_n=topn_coarse
|
| 463 |
+
)
|
| 464 |
+
|
| 465 |
+
if not coarse:
|
| 466 |
+
print("[main] No coarse candidates. Please check user_parents or dataset.")
|
| 467 |
+
return
|
| 468 |
+
|
| 469 |
+
# 6) ML reranking (Step 3)
|
| 470 |
+
ml_top = ml_generate_candidates(
|
| 471 |
+
coarse_candidates=coarse,
|
| 472 |
+
user_parents=user_parents,
|
| 473 |
+
user_profile=user_profile,
|
| 474 |
+
model_path=model_path,
|
| 475 |
+
topk=200
|
| 476 |
+
)
|
| 477 |
+
|
| 478 |
+
if ml_top is None or len(ml_top) == 0:
|
| 479 |
+
print("[main] No ML candidates returned.")
|
| 480 |
+
return
|
| 481 |
+
|
| 482 |
+
# 6.5) KMeans Diversification
|
| 483 |
+
candidates_list = ml_top.to_dict(orient="records")
|
| 484 |
+
X_cluster = build_cluster_features(candidates_list)
|
| 485 |
+
diversified = diversify_topk_with_min_clusters(
|
| 486 |
+
ranked_candidates=candidates_list,
|
| 487 |
+
feature_matrix=X_cluster,
|
| 488 |
+
top_k=topk,
|
| 489 |
+
n_clusters=10,
|
| 490 |
+
min_clusters=3
|
| 491 |
+
)
|
| 492 |
+
|
| 493 |
+
ml_top = pd.DataFrame(diversified)
|
| 494 |
+
|
| 495 |
+
# 7) Pretty print (reuse print_candidates expecting 'match_score')
|
| 496 |
+
ml_top = ml_top.copy()
|
| 497 |
+
if "match_score" not in ml_top.columns and "ml_score" in ml_top.columns:
|
| 498 |
+
ml_top["match_score"] = ml_top["ml_score"]
|
| 499 |
+
|
| 500 |
+
print(f"\nFound {len(ml_top)} candidate recipes:\n")
|
| 501 |
+
print_candidates(ml_top, user_parents, topk=topk)
|
| 502 |
+
|
| 503 |
+
# 8) Give feedbacks
|
| 504 |
+
qid = get_next_qid(user_id)
|
| 505 |
+
selected_idx = int(input(f"Select a recipe from 1-{topk}: ")) - 1
|
| 506 |
+
selected_row = ml_top.iloc[selected_idx].to_dict()
|
| 507 |
+
collect_user_feedback(user_id, selected_row, user_profile, qid)
|
| 508 |
+
|
| 509 |
+
|
| 510 |
+
def recommend_recipes(detection_payload, user_id, recipes_df, topk=5):
|
| 511 |
+
"""
|
| 512 |
+
Unified recommendation entry for the app.
|
| 513 |
+
Handles user profile loading, ingredient mapping, and embedding fallback internally.
|
| 514 |
+
|
| 515 |
+
"""
|
| 516 |
+
# 0) Check if retraining is needed (new feedback, updated features)
|
| 517 |
+
maybe_retrain_model(user_id)
|
| 518 |
+
# 1) Ingredient mapping - use existing high/low confidence fields
|
| 519 |
+
ingredient_map = load_ingredient_map()
|
| 520 |
+
ingredients = detection_payload.get("ingredients", [])
|
| 521 |
+
|
| 522 |
+
high_conf = detection_payload.get("high_confidence_ingredients", [])
|
| 523 |
+
low_conf = detection_payload.get("low_confidence_ingredients", [])
|
| 524 |
+
|
| 525 |
+
# user_parents = []
|
| 526 |
+
# for item in ingredients:
|
| 527 |
+
# name = item.get("name")
|
| 528 |
+
# if not name:
|
| 529 |
+
# continue
|
| 530 |
+
# parent = ingredient_map.get(name.lower())
|
| 531 |
+
# if parent:
|
| 532 |
+
# user_parents.append(parent)
|
| 533 |
+
|
| 534 |
+
# user_parents = sorted(set(user_parents))
|
| 535 |
+
|
| 536 |
+
parents_map = ingredient_map.get("parents", {}) or {}
|
| 537 |
+
children_map = ingredient_map.get("children", {}) or {}
|
| 538 |
+
|
| 539 |
+
user_parents = []
|
| 540 |
+
for item in ingredients:
|
| 541 |
+
name = (item.get("name") or "").strip().lower().replace("_", " ")
|
| 542 |
+
if not name:
|
| 543 |
+
continue
|
| 544 |
+
|
| 545 |
+
parent = None
|
| 546 |
+
if name in children_map:
|
| 547 |
+
parent = children_map[name].get("parent") or children_map[name].get("fallback")
|
| 548 |
+
elif name in parents_map:
|
| 549 |
+
parent = name
|
| 550 |
+
|
| 551 |
+
if parent:
|
| 552 |
+
user_parents.append(parent)
|
| 553 |
+
|
| 554 |
+
user_parents = sorted(set(user_parents))
|
| 555 |
+
|
| 556 |
+
high_conf = sorted(set(high_conf))
|
| 557 |
+
low_conf = sorted(set(low_conf))
|
| 558 |
+
|
| 559 |
+
# 2) Load user profile internally
|
| 560 |
+
user_profile = ensure_user_profile(user_id)
|
| 561 |
+
|
| 562 |
+
# 3) Embedding fallback
|
| 563 |
+
match_uid, sim = find_most_similar_user(user_id, threshold=0.85)
|
| 564 |
+
if match_uid is not None:
|
| 565 |
+
print(f"[embedding] Using model of similar user '{match_uid}' for '{user_id}' (sim={sim:.3f})")
|
| 566 |
+
src_dir = USER_DATA_DIR / match_uid
|
| 567 |
+
dst_dir = USER_DATA_DIR / user_id
|
| 568 |
+
dst_dir.mkdir(parents=True, exist_ok=True)
|
| 569 |
+
for fname in ["ranker.pkl", "user_features_rank.csv"]:
|
| 570 |
+
src = src_dir / fname
|
| 571 |
+
dst = dst_dir / fname
|
| 572 |
+
if os.path.exists(src) and not os.path.exists(dst):
|
| 573 |
+
shutil.copyfile(src, dst)
|
| 574 |
+
print(f"[embedding] Copied {fname} from {match_uid} to {user_id}")
|
| 575 |
+
|
| 576 |
+
# 4) Coldstart / model ensure
|
| 577 |
+
model_path = ensure_model(user_id)
|
| 578 |
+
|
| 579 |
+
# 5) Coarse rank
|
| 580 |
+
df = prepare_recipes_df(recipes_df)
|
| 581 |
+
recipes_records = df.to_dict(orient="records")
|
| 582 |
+
filtered_records = [r for r in recipes_records if hard_filter(r, user_profile)]
|
| 583 |
+
if not filtered_records:
|
| 584 |
+
return pd.DataFrame(), user_parents, high_conf, low_conf
|
| 585 |
+
|
| 586 |
+
coarse = coarse_rank_candidates(
|
| 587 |
+
recipes=recipes_records,
|
| 588 |
+
user_parents=user_parents,
|
| 589 |
+
user_profile=user_profile,
|
| 590 |
+
top_n=20000
|
| 591 |
+
)
|
| 592 |
+
if not coarse:
|
| 593 |
+
return pd.DataFrame(), user_parents, high_conf, low_conf
|
| 594 |
+
|
| 595 |
+
# 6) ML rerank
|
| 596 |
+
ml_top = ml_generate_candidates(
|
| 597 |
+
coarse_candidates=coarse,
|
| 598 |
+
user_parents=user_parents,
|
| 599 |
+
user_profile=user_profile,
|
| 600 |
+
model_path=model_path,
|
| 601 |
+
topk=200
|
| 602 |
+
)
|
| 603 |
+
if ml_top is None or len(ml_top) == 0:
|
| 604 |
+
return pd.DataFrame(), user_parents, high_conf, low_conf
|
| 605 |
+
|
| 606 |
+
# 7) KMeans diversification
|
| 607 |
+
candidates_list = ml_top.to_dict(orient="records")
|
| 608 |
+
X_cluster = build_cluster_features(candidates_list)
|
| 609 |
+
diversified = diversify_topk_with_min_clusters(
|
| 610 |
+
ranked_candidates=candidates_list,
|
| 611 |
+
feature_matrix=X_cluster,
|
| 612 |
+
top_k=topk,
|
| 613 |
+
n_clusters=10,
|
| 614 |
+
min_clusters=3
|
| 615 |
+
)
|
| 616 |
+
|
| 617 |
+
ml_top = pd.DataFrame(diversified)
|
| 618 |
+
|
| 619 |
+
return ml_top, user_parents, high_conf, low_conf
|
| 620 |
+
|
| 621 |
+
|
| 622 |
+
def get_feedback(user_id: str, recipe_row: dict, qid: int = None):
|
| 623 |
+
"""
|
| 624 |
+
App-friendly feedback collection function.
|
| 625 |
+
|
| 626 |
+
Parameters
|
| 627 |
+
----------
|
| 628 |
+
user_id : str
|
| 629 |
+
The ID of the user submitting feedback.
|
| 630 |
+
recipe_row : dict
|
| 631 |
+
The recipe information dict (e.g., one row from ml_top.to_dict()).
|
| 632 |
+
qid : int, optional
|
| 633 |
+
The query ID for ranking context. If not provided, defaults to 0 or auto increments.
|
| 634 |
+
"""
|
| 635 |
+
# 1) Ensure user profile is loaded internally
|
| 636 |
+
user_profile = ensure_user_profile(user_id)
|
| 637 |
+
|
| 638 |
+
# 2) If qid is not provided, generate automatically
|
| 639 |
+
if qid is None:
|
| 640 |
+
try:
|
| 641 |
+
qid = get_next_qid(user_id)
|
| 642 |
+
except Exception:
|
| 643 |
+
qid = 0
|
| 644 |
+
|
| 645 |
+
# 3) Delegate to existing collect_user_feedback
|
| 646 |
+
collect_user_feedback(user_id, recipe_row, user_profile, qid)
|
| 647 |
+
|
| 648 |
+
print(f"[app] Feedback collected for user '{user_id}', qid={qid}, recipe_id={recipe_row.get('id')}")
|
| 649 |
+
|
| 650 |
+
|
| 651 |
+
if __name__ == "__main__":
|
| 652 |
+
main("user_3")
|
recipe_recommendation/readme.txt
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
readme_text = """\
|
| 2 |
+
===========================
|
| 3 |
+
Recipe Recommendation System
|
| 4 |
+
===========================
|
| 5 |
+
|
| 6 |
+
This project implements a complete recipe recommendation system, including cold start ranking, ML-based reranking, KMeans-based diversification, and user feedback collection.
|
| 7 |
+
All functions are fully encapsulated and can be easily called from external applications.
|
| 8 |
+
|
| 9 |
+
-------------------------------------
|
| 10 |
+
1. Main Entry Functions for External Use
|
| 11 |
+
-------------------------------------
|
| 12 |
+
|
| 13 |
+
The three main functions for external usage are:
|
| 14 |
+
|
| 15 |
+
1) recommend_recipes(detection_payload, user_id, recipes_df, topk=5)
|
| 16 |
+
- Input:
|
| 17 |
+
• detection_payload: dict or JSON object containing detected ingredients.
|
| 18 |
+
• user_id: str, unique user identifier.
|
| 19 |
+
• recipes_df: pandas.DataFrame loaded by `load_recipes()`.
|
| 20 |
+
• topk: int, number of final recipes to return (default = 5).
|
| 21 |
+
- Output:
|
| 22 |
+
• ml_top: pandas.DataFrame of top recommended recipes (with ml_score & metadata).
|
| 23 |
+
• user_parents: list of mapped parent ingredients.
|
| 24 |
+
• high_conf: list of high-confidence ingredient matches.
|
| 25 |
+
• low_conf: list of low-confidence or unmapped ingredients.
|
| 26 |
+
|
| 27 |
+
Internally, this function performs:
|
| 28 |
+
- Ingredient mapping from detection payload
|
| 29 |
+
- Embedding fallback (copy model/features from similar user)
|
| 30 |
+
- Cold start feature generation if needed
|
| 31 |
+
- Coarse ranking → ML reranking → KMeans diversification
|
| 32 |
+
- Returns the final diversified top-k recommendations.
|
| 33 |
+
|
| 34 |
+
2) load_recipes()
|
| 35 |
+
- Input: None
|
| 36 |
+
- Output: pandas.DataFrame of all recipes (automatically downloaded from Hugging Face if not present).
|
| 37 |
+
- This function loads the full recipe dataset into memory.
|
| 38 |
+
If the dataset is not found locally, it will automatically download and cache it under `data/`.
|
| 39 |
+
|
| 40 |
+
3) get_feedback(user_id, recipe_row, qid=None)
|
| 41 |
+
- Input:
|
| 42 |
+
• user_id: str, unique user identifier.
|
| 43 |
+
• recipe_row: dict, a single recipe row (e.g. one of the top-k recommendations).
|
| 44 |
+
• qid: int, optional query ID. Defaults to auto-generated or 0.
|
| 45 |
+
- Output: None
|
| 46 |
+
- Function:
|
| 47 |
+
• Loads user profile internally
|
| 48 |
+
• Appends the feedback (recipe metadata, user choice) into `user_data/{user_id}/feedback.csv`
|
| 49 |
+
• Does not retrain the model automatically (use `maybe_retrain_model` if needed)
|
| 50 |
+
|
| 51 |
+
----------------------------------------
|
| 52 |
+
2. User Profiles and Pretrained Models
|
| 53 |
+
----------------------------------------
|
| 54 |
+
|
| 55 |
+
The `user_data` folder contains four example users:
|
| 56 |
+
|
| 57 |
+
- user_0 : Empty profile for testing the system’s ability to bootstrap from zero information.
|
| 58 |
+
- user_1 : A user with specific dietary habits.
|
| 59 |
+
- user_2 : A user with different dietary preferences.
|
| 60 |
+
- user_3 : Similar to user_2, used to test simple embedding-based model reuse.
|
| 61 |
+
|
| 62 |
+
For each user:
|
| 63 |
+
- Cold start features and ML models (`user_features_rank.csv` and `ranker.pkl`) have already been generated.
|
| 64 |
+
- You can add new users by creating a new folder under `user_data/` with a profile file `user_profile.json` in the following format:
|
| 65 |
+
|
| 66 |
+
{
|
| 67 |
+
"user_id": "user_001",
|
| 68 |
+
"num_feedback": 0,
|
| 69 |
+
"diet": {
|
| 70 |
+
"vegetarian_type": "flexible_vegetarian"
|
| 71 |
+
},
|
| 72 |
+
"allergies": ["peanut", "shrimp"],
|
| 73 |
+
"region_preference": ["Asia", "Europe"],
|
| 74 |
+
"nutritional_goals": {
|
| 75 |
+
"calories": { "min": 400, "max": 3000 },
|
| 76 |
+
"protein": { "min": 100, "max": 160 }
|
| 77 |
+
},
|
| 78 |
+
"other_preferences": {
|
| 79 |
+
"preferred_main": ["chicken", "tofu"],
|
| 80 |
+
"disliked_main": ["lamb"],
|
| 81 |
+
"cooking_time_max": 40
|
| 82 |
+
}
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
The cold start process will typically take **15–25 minutes**, depending on your system performance.
|
| 86 |
+
|
| 87 |
+
----------------------------------------
|
| 88 |
+
3. Dataset Download
|
| 89 |
+
----------------------------------------
|
| 90 |
+
|
| 91 |
+
Large recipe and ingredient mapping files are stored on Hugging Face under the account:
|
| 92 |
+
→ iris314
|
| 93 |
+
|
| 94 |
+
These files will be automatically downloaded the first time `load_recipes()` or related functions are called.
|
| 95 |
+
No manual setup is required.
|
| 96 |
+
|
| 97 |
+
----------------------------------------
|
| 98 |
+
4. Feedback Loop & Retraining
|
| 99 |
+
----------------------------------------
|
| 100 |
+
|
| 101 |
+
User feedback is saved in `feedback.csv` files under each user's directory.
|
| 102 |
+
To trigger retraining after feedback collection, call:
|
| 103 |
+
|
| 104 |
+
from trainmodel import maybe_retrain_model
|
| 105 |
+
maybe_retrain_model(user_id)
|
| 106 |
+
|
| 107 |
+
This checks timestamps between `user_features_rank.csv` and `ranker.pkl` to decide if retraining is needed.
|
| 108 |
+
|
| 109 |
+
----------------------------------------
|
| 110 |
+
5. Cold Start & Embedding Fallback
|
| 111 |
+
----------------------------------------
|
| 112 |
+
|
| 113 |
+
- If a user has no model or features, the system runs a cold start procedure to generate ranking features.
|
| 114 |
+
- If a similar user exists (cosine similarity > 0.85), the system copies their model and features to skip retraining.
|
| 115 |
+
|
| 116 |
+
----------------------------------------
|
| 117 |
+
6. Quick Start Example
|
| 118 |
+
----------------------------------------
|
| 119 |
+
|
| 120 |
+
from main import recommend_recipes, load_recipes, get_feedback
|
| 121 |
+
|
| 122 |
+
# 1. Load dataset
|
| 123 |
+
recipes_df = load_recipes()
|
| 124 |
+
|
| 125 |
+
# 2. Prepare a fake detection payload
|
| 126 |
+
payload = {"detected_ingredients": ["chicken", "milk", "flour"]}
|
| 127 |
+
|
| 128 |
+
# 3. Recommend
|
| 129 |
+
top_recipes, user_parents, high_conf, low_conf = recommend_recipes(payload, "user_1", recipes_df, topk=5)
|
| 130 |
+
|
| 131 |
+
# 4. Feedback
|
| 132 |
+
get_feedback("user_1", top_recipes.iloc[0].to_dict())
|
| 133 |
+
|
| 134 |
+
----------------------------------------
|
| 135 |
+
End of README
|
| 136 |
+
----------------------------------------
|
| 137 |
+
"""
|
| 138 |
+
|
| 139 |
+
with open("README.txt", "w", encoding="utf-8") as f:
|
| 140 |
+
f.write(readme_text)
|
| 141 |
+
|
| 142 |
+
"README.txt file created successfully."
|
recipe_recommendation/readme_cn.txt
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
=============================
|
| 2 |
+
菜谱推荐系统(Recipe Recommendation)
|
| 3 |
+
=============================
|
| 4 |
+
|
| 5 |
+
本项目实现了一个完整的菜谱推荐系统,包括:
|
| 6 |
+
- 冷启动(Cold Start)排序
|
| 7 |
+
- 机器学习模型(ML)重排序
|
| 8 |
+
- KMeans 聚类多样化
|
| 9 |
+
- 用户反馈收集与自动重训
|
| 10 |
+
|
| 11 |
+
所有功能都已封装好,外部调用只需要几个简单的接口。
|
| 12 |
+
|
| 13 |
+
----------------------------------------
|
| 14 |
+
1. 外部主要调用函数
|
| 15 |
+
----------------------------------------
|
| 16 |
+
|
| 17 |
+
1) recommend_recipes(detection_payload, user_id, recipes_df, topk=5)
|
| 18 |
+
- 输入:
|
| 19 |
+
• detection_payload:dict 或 JSON,表示检测到的食材
|
| 20 |
+
• user_id:str,用户 ID
|
| 21 |
+
• recipes_df:通过 `load_recipes()` 加载的菜谱 DataFrame
|
| 22 |
+
• topk:返回的推荐菜谱数量(默认 5)
|
| 23 |
+
- 输出:
|
| 24 |
+
• ml_top:推荐结果(DataFrame)
|
| 25 |
+
• user_parents:映射后的父食材列表
|
| 26 |
+
• high_conf:高置信度匹配
|
| 27 |
+
• low_conf:低置信度/未匹配食材
|
| 28 |
+
|
| 29 |
+
功能包括:食材映射 → 相似用户模型复制 → 冷启动 → 粗排 → ML 重排 → KMeans 多样化。
|
| 30 |
+
|
| 31 |
+
2) load_recipes()
|
| 32 |
+
- 自动从 Hugging Face(iris314)下载菜谱数据到 `data/`,并返回 DataFrame。
|
| 33 |
+
|
| 34 |
+
3) get_feedback(user_id, recipe_row, qid=None)
|
| 35 |
+
- 收集用户反馈并写入 `user_data/{user_id}/feedback.csv`
|
| 36 |
+
- user_profile 自动加载,qid 缺省自动分配
|
| 37 |
+
|
| 38 |
+
----------------------------------------
|
| 39 |
+
2. 用户数据
|
| 40 |
+
----------------------------------------
|
| 41 |
+
|
| 42 |
+
`user_data` 里包含四个示例用户:
|
| 43 |
+
- user_0:空 profile,用于测试零信息自启
|
| 44 |
+
- user_1 / user_2:有不同饮食偏好的真实用户
|
| 45 |
+
- user_3:与 user_2 类似,用于测试 embedding 复制功能
|
| 46 |
+
|
| 47 |
+
每个用户目录下都有 `user_profile.json`、`user_features_rank.csv`、`ranker.pkl`。
|
| 48 |
+
你可以新增用户,只需遵循以下 JSON 格式:
|
| 49 |
+
|
| 50 |
+
{
|
| 51 |
+
"user_id": "user_001",
|
| 52 |
+
"num_feedback": 0,
|
| 53 |
+
"diet": {"vegetarian_type": "flexible_vegetarian"},
|
| 54 |
+
"allergies": ["peanut", "shrimp"],
|
| 55 |
+
"region_preference": ["Asia", "Europe"],
|
| 56 |
+
"nutritional_goals": {
|
| 57 |
+
"calories": {"min": 400, "max": 3000},
|
| 58 |
+
"protein": {"min": 100, "max": 160}
|
| 59 |
+
},
|
| 60 |
+
"other_preferences": {
|
| 61 |
+
"preferred_main": ["chicken", "tofu"],
|
| 62 |
+
"disliked_main": ["lamb"],
|
| 63 |
+
"cooking_time_max": 40
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
冷启动过程通常需要 15~25 分钟(视机器性能而定)。
|
| 68 |
+
|
| 69 |
+
----------------------------------------
|
| 70 |
+
3. 数据下载
|
| 71 |
+
----------------------------------------
|
| 72 |
+
|
| 73 |
+
菜谱和食材映射等大文件会自动从 Hugging Face(iris314)下载并缓存到 `data/`,无需手动设置。
|
| 74 |
+
|
| 75 |
+
----------------------------------------
|
| 76 |
+
4. 快速上手示例
|
| 77 |
+
----------------------------------------
|
| 78 |
+
|
| 79 |
+
```python
|
| 80 |
+
from main import recommend_recipes, load_recipes, get_feedback
|
| 81 |
+
|
| 82 |
+
# 加载菜谱
|
| 83 |
+
recipes_df = load_recipes()
|
| 84 |
+
|
| 85 |
+
# 准备模拟检测输入
|
| 86 |
+
payload = {"detected_ingredients": ["chicken", "milk", "flour"]}
|
| 87 |
+
|
| 88 |
+
# 获取推荐结果
|
| 89 |
+
top_recipes, user_parents, high_conf, low_conf = recommend_recipes(payload, "user_1", recipes_df, topk=5)
|
| 90 |
+
|
| 91 |
+
# 提交反馈
|
| 92 |
+
get_feedback("user_1", top_recipes.iloc[0].to_dict())
|
recipe_recommendation/src/__init__.py
ADDED
|
File without changes
|
recipe_recommendation/src/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (195 Bytes). View file
|
|
|
recipe_recommendation/src/__pycache__/candidate.cpython-313.pyc
ADDED
|
Binary file (15 kB). View file
|
|
|
recipe_recommendation/src/__pycache__/coldstart.cpython-313.pyc
ADDED
|
Binary file (13.7 kB). View file
|
|
|
recipe_recommendation/src/__pycache__/embedding.cpython-313.pyc
ADDED
|
Binary file (5.94 kB). View file
|
|
|
recipe_recommendation/src/__pycache__/feature.cpython-313.pyc
ADDED
|
Binary file (8.57 kB). View file
|
|
|
recipe_recommendation/src/__pycache__/highlight.cpython-313.pyc
ADDED
|
Binary file (4.47 kB). View file
|
|
|
recipe_recommendation/src/__pycache__/io.cpython-313.pyc
ADDED
|
Binary file (2.02 kB). View file
|
|
|
recipe_recommendation/src/__pycache__/trainmodel.cpython-313.pyc
ADDED
|
Binary file (10.4 kB). View file
|
|
|
recipe_recommendation/src/candidate.py
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
from .feature import build_features
|
| 4 |
+
from .io import load_ingredient_map
|
| 5 |
+
import joblib
|
| 6 |
+
|
| 7 |
+
# Load ingredient map globally to avoid repeated I/O
|
| 8 |
+
INGREDIENT_MAP = load_ingredient_map()
|
| 9 |
+
PARENTS = INGREDIENT_MAP["parents"]
|
| 10 |
+
CHILDREN = INGREDIENT_MAP["children"]
|
| 11 |
+
|
| 12 |
+
def extract_user_parents(user_ingredients):
|
| 13 |
+
"""Map user's ingredients to parent categories"""
|
| 14 |
+
user_parents = set()
|
| 15 |
+
for ing in user_ingredients:
|
| 16 |
+
ing_lower = ing.lower().strip()
|
| 17 |
+
if ing_lower in CHILDREN:
|
| 18 |
+
parent = CHILDREN[ing_lower]["parent"]
|
| 19 |
+
user_parents.add(parent)
|
| 20 |
+
elif ing_lower in PARENTS:
|
| 21 |
+
user_parents.add(ing_lower)
|
| 22 |
+
return user_parents
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# def hard_filter(recipe, user_profile):
|
| 26 |
+
# diet = user_profile.get("diet", {}).get("vegetarian_type", "").lower()
|
| 27 |
+
# if diet == "vegan" and not recipe.get("is_vegan_safe", True):
|
| 28 |
+
# return False
|
| 29 |
+
# if diet in ["vegetarian", "flexible_vegetarian"] and not recipe.get("is_vegetarian_safe", True):
|
| 30 |
+
# return False
|
| 31 |
+
# return True
|
| 32 |
+
|
| 33 |
+
def hard_filter(recipe: dict, user_profile: dict) -> bool:
|
| 34 |
+
"""
|
| 35 |
+
Apply hard filters to determine whether a recipe matches the user's dietary profile.
|
| 36 |
+
|
| 37 |
+
Args:
|
| 38 |
+
recipe (dict): Recipe data containing attributes like 'calories', 'protein', 'is_vegan_safe', etc.
|
| 39 |
+
user_profile (dict): User preferences including diet type, nutritional goals, and disliked ingredients.
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
bool: True if the recipe passes all hard filters, False otherwise.
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
# --- Dietary filter ---
|
| 46 |
+
diet = user_profile.get("diet", {}).get("vegetarian_type", "").lower()
|
| 47 |
+
if diet == "vegan" and not recipe.get("is_vegan_safe", True):
|
| 48 |
+
return False
|
| 49 |
+
if diet in ["vegetarian", "flexible_vegetarian"] and not recipe.get("is_vegetarian_safe", True):
|
| 50 |
+
return False
|
| 51 |
+
|
| 52 |
+
# --- Nutritional goal filter ---
|
| 53 |
+
nutritional_goals = user_profile.get("nutritional_goals", {})
|
| 54 |
+
|
| 55 |
+
# Calorie range filter
|
| 56 |
+
cal_range = nutritional_goals.get("calories", {})
|
| 57 |
+
cal_min = cal_range.get("min", 0)
|
| 58 |
+
cal_max = cal_range.get("max", 9999)
|
| 59 |
+
recipe_calories = recipe.get("calories", 0)
|
| 60 |
+
|
| 61 |
+
if not (cal_min <= recipe_calories <= cal_max):
|
| 62 |
+
return False
|
| 63 |
+
|
| 64 |
+
# Protein range filter
|
| 65 |
+
protein_range = nutritional_goals.get("protein", {})
|
| 66 |
+
pro_min = protein_range.get("min", 0)
|
| 67 |
+
pro_max = protein_range.get("max", 999)
|
| 68 |
+
recipe_protein = recipe.get("protein", 0)
|
| 69 |
+
|
| 70 |
+
if not (pro_min <= recipe_protein <= pro_max):
|
| 71 |
+
return False
|
| 72 |
+
|
| 73 |
+
# --- Disliked main ingredients filter ---
|
| 74 |
+
disliked_main = set(user_profile.get("other_preferences", {}).get("disliked_main", []))
|
| 75 |
+
if disliked_main:
|
| 76 |
+
recipe_main = recipe.get("main_parent", set())
|
| 77 |
+
if isinstance(recipe_main, list):
|
| 78 |
+
recipe_main = set(recipe_main)
|
| 79 |
+
elif not isinstance(recipe_main, set):
|
| 80 |
+
recipe_main = set()
|
| 81 |
+
|
| 82 |
+
# Exclude if any main ingredient is in the disliked list
|
| 83 |
+
if recipe_main & disliked_main:
|
| 84 |
+
return False
|
| 85 |
+
|
| 86 |
+
return True
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
COARSE_WEIGHTS = {
|
| 91 |
+
"main_match_ratio": 1.0,
|
| 92 |
+
"staple_match_ratio": 0.3,
|
| 93 |
+
"other_match_ratio": 0.6,
|
| 94 |
+
"low_calorie_penalty": 0.2,
|
| 95 |
+
"preferred_course_overlap": 0.1,
|
| 96 |
+
"region_match": 0.8
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def coarse_score(features, weights=COARSE_WEIGHTS):
|
| 101 |
+
score = 0.0
|
| 102 |
+
for key, w in weights.items():
|
| 103 |
+
if key in features:
|
| 104 |
+
score += w * features[key]
|
| 105 |
+
return score
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def coarse_rank_candidates(recipes, user_parents, user_profile, top_n=30000, weights=COARSE_WEIGHTS):
|
| 109 |
+
"""
|
| 110 |
+
Stage 2: Coarse Ranking (NumPy vectorized implementation)
|
| 111 |
+
---------------------------------------------------------
|
| 112 |
+
Quickly retrieves a subset of candidate recipes by computing
|
| 113 |
+
ingredient coverage ratios (main / staple / other) between
|
| 114 |
+
the user's pantry and the recipes using vectorized operations.
|
| 115 |
+
|
| 116 |
+
This function replaces the original Python loop version
|
| 117 |
+
for significant speedup during cold start and real-time ranking.
|
| 118 |
+
"""
|
| 119 |
+
if not recipes:
|
| 120 |
+
return []
|
| 121 |
+
|
| 122 |
+
# === 1. Build parent vocabulary ===
|
| 123 |
+
# Extract all unique parent ingredients across main/staple/other fields.
|
| 124 |
+
all_parents = sorted({
|
| 125 |
+
p for r in recipes
|
| 126 |
+
for k in ["main_parent", "staple_parent", "other_parent"]
|
| 127 |
+
for p in (r.get(k) or [])
|
| 128 |
+
})
|
| 129 |
+
parent_index = {p: i for i, p in enumerate(all_parents)}
|
| 130 |
+
num_recipes = len(recipes)
|
| 131 |
+
num_parents = len(all_parents)
|
| 132 |
+
|
| 133 |
+
# === 2. Construct multi-hot matrices for main, staple, other ===
|
| 134 |
+
# Each row corresponds to a recipe; each column to a parent ingredient.
|
| 135 |
+
main_mat = np.zeros((num_recipes, num_parents), dtype=np.uint8)
|
| 136 |
+
staple_mat = np.zeros((num_recipes, num_parents), dtype=np.uint8)
|
| 137 |
+
other_mat = np.zeros((num_recipes, num_parents), dtype=np.uint8)
|
| 138 |
+
|
| 139 |
+
for i, r in enumerate(recipes):
|
| 140 |
+
for p in r.get("main_parent", []):
|
| 141 |
+
if p in parent_index:
|
| 142 |
+
main_mat[i, parent_index[p]] = 1
|
| 143 |
+
for p in r.get("staple_parent", []):
|
| 144 |
+
if p in parent_index:
|
| 145 |
+
staple_mat[i, parent_index[p]] = 1
|
| 146 |
+
for p in r.get("other_parent", []):
|
| 147 |
+
if p in parent_index:
|
| 148 |
+
other_mat[i, parent_index[p]] = 1
|
| 149 |
+
|
| 150 |
+
# === 3. Encode user pantry as a binary mask ===
|
| 151 |
+
user_mask = np.zeros(num_parents, dtype=np.uint8)
|
| 152 |
+
for p in user_parents:
|
| 153 |
+
if p in parent_index:
|
| 154 |
+
user_mask[parent_index[p]] = 1
|
| 155 |
+
|
| 156 |
+
# === 4. Compute ingredient match ratios in batch ===
|
| 157 |
+
# main_ratio = (# of matched main ingredients) / (# of total main ingredients)
|
| 158 |
+
main_total = main_mat.sum(axis=1)
|
| 159 |
+
staple_total = staple_mat.sum(axis=1)
|
| 160 |
+
other_total = other_mat.sum(axis=1)
|
| 161 |
+
|
| 162 |
+
main_match = (main_mat @ user_mask)
|
| 163 |
+
staple_match = (staple_mat @ user_mask)
|
| 164 |
+
other_match = (other_mat @ user_mask)
|
| 165 |
+
|
| 166 |
+
main_ratio = main_match / np.maximum(main_total, 1)
|
| 167 |
+
staple_ratio = staple_match / np.maximum(staple_total, 1)
|
| 168 |
+
other_ratio = other_match / np.maximum(other_total, 1)
|
| 169 |
+
|
| 170 |
+
# === 5. Additional coarse ranking signals ===
|
| 171 |
+
# Low-calorie preference & preferred cuisine overlap
|
| 172 |
+
calories = np.array([r.get("calories", 0) for r in recipes], dtype=float)
|
| 173 |
+
calorie_threshold = user_profile.get("calorie_threshold", 9999)
|
| 174 |
+
low_calorie_penalty = (calories <= calorie_threshold).astype(float)
|
| 175 |
+
|
| 176 |
+
preferred_course_types = set(user_profile.get("preferred_course_types", []))
|
| 177 |
+
preferred_overlap = np.array([
|
| 178 |
+
len(set(r.get("cuisine_attr", [])) & preferred_course_types)
|
| 179 |
+
for r in recipes
|
| 180 |
+
], dtype=float)
|
| 181 |
+
|
| 182 |
+
# Region preference matching
|
| 183 |
+
preferred_regions = set(user_profile.get("region_preference", []))
|
| 184 |
+
region_match = np.array([
|
| 185 |
+
1.0 if any(region in preferred_regions for region in
|
| 186 |
+
(r.get("region", []) if isinstance(r.get("region"), (list, set))
|
| 187 |
+
else [r.get("region", "")]))
|
| 188 |
+
else 0.0
|
| 189 |
+
for r in recipes
|
| 190 |
+
], dtype=float)
|
| 191 |
+
|
| 192 |
+
# === 6. Compute coarse ranking scores ===
|
| 193 |
+
scores = (
|
| 194 |
+
weights["main_match_ratio"] * main_ratio +
|
| 195 |
+
weights["staple_match_ratio"] * staple_ratio +
|
| 196 |
+
weights["other_match_ratio"] * other_ratio +
|
| 197 |
+
weights["low_calorie_penalty"] * low_calorie_penalty +
|
| 198 |
+
weights["preferred_course_overlap"] * preferred_overlap +
|
| 199 |
+
weights.get("region_match", 0) * region_match
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
# === 7. Select top-N candidates ===
|
| 203 |
+
valid_idx = np.where(scores > 0)[0]
|
| 204 |
+
if valid_idx.size == 0:
|
| 205 |
+
return []
|
| 206 |
+
|
| 207 |
+
scores_valid = scores[valid_idx]
|
| 208 |
+
topk = min(top_n, valid_idx.size)
|
| 209 |
+
|
| 210 |
+
# Optional dynamic thresholding: keep candidates with score >= 50% of max
|
| 211 |
+
max_score = scores_valid.max()
|
| 212 |
+
keep_mask = scores_valid >= max_score * 0.5
|
| 213 |
+
keep_idx = valid_idx[keep_mask]
|
| 214 |
+
|
| 215 |
+
if keep_idx.size == 0:
|
| 216 |
+
return []
|
| 217 |
+
|
| 218 |
+
order = np.argsort(scores[keep_idx])[::-1]
|
| 219 |
+
top_idx = keep_idx[order[:topk]]
|
| 220 |
+
|
| 221 |
+
# Return the original recipe dicts corresponding to the top candidates
|
| 222 |
+
return [recipes[i] for i in top_idx]
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
def rule_generate_candidates(df, user_parents, user_profile):
|
| 226 |
+
"""
|
| 227 |
+
Step 3: Rule-based reranking of coarse candidates.
|
| 228 |
+
Uses all available features (except vegan/vegetarian filters, which were applied in Step 1)
|
| 229 |
+
to compute a weighted rule-based score for each recipe.
|
| 230 |
+
"""
|
| 231 |
+
|
| 232 |
+
def score(row):
|
| 233 |
+
# Build recipe_dict for feature extraction
|
| 234 |
+
recipe_dict = {
|
| 235 |
+
"main": row.get("main_parent", set()),
|
| 236 |
+
"staple": row.get("staple_parent", set()),
|
| 237 |
+
"other": row.get("other_parent", set()),
|
| 238 |
+
"seasoning": row.get("seasoning_parent", set()),
|
| 239 |
+
"matched_main": len(row.get("main_parent", set()) & set(user_parents)),
|
| 240 |
+
"matched_staple": len(row.get("staple_parent", set()) & set(user_parents)),
|
| 241 |
+
"matched_other": len(row.get("other_parent", set()) & set(user_parents)),
|
| 242 |
+
"calories": row.get("calories", 0),
|
| 243 |
+
"protein": row.get("protein", 0),
|
| 244 |
+
"fat": row.get("fat", 0),
|
| 245 |
+
"region": row.get("region", ""),
|
| 246 |
+
"cuisine_attr": row.get("cuisine_attr", []),
|
| 247 |
+
"ingredients": row.get("ingredients", []),
|
| 248 |
+
"minutes": row.get("minutes", None),
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
# Extract rule features
|
| 252 |
+
feats = build_features(recipe_dict, user_profile)
|
| 253 |
+
|
| 254 |
+
# Compute rule-based score
|
| 255 |
+
score = 0.0
|
| 256 |
+
|
| 257 |
+
# Ingredient match ratios
|
| 258 |
+
# Main ingredients are weighted most heavily
|
| 259 |
+
score += 2.0 * feats["main_match_ratio"]
|
| 260 |
+
score += 1.0 * feats["staple_match_ratio"]
|
| 261 |
+
score += 1.0 * feats["other_match_ratio"]
|
| 262 |
+
|
| 263 |
+
# Nutrition preferences
|
| 264 |
+
# Low calorie preference
|
| 265 |
+
if user_profile.get("low_calorie", False):
|
| 266 |
+
if feats["low_calorie_penalty"]:
|
| 267 |
+
score += 0.5
|
| 268 |
+
|
| 269 |
+
# High protein preference
|
| 270 |
+
if user_profile.get("high_protein", False) and feats["protein_ratio"] > 0.25:
|
| 271 |
+
score += 0.3
|
| 272 |
+
|
| 273 |
+
# Low fat preference (penalty if fat ratio is too high)
|
| 274 |
+
if user_profile.get("low_fat", False) and feats["fat_ratio"] > 0.35:
|
| 275 |
+
score -= 0.3
|
| 276 |
+
|
| 277 |
+
# Region / cuisine / main-type preferences
|
| 278 |
+
score += 0.5 * feats["region_match"]
|
| 279 |
+
score += 0.4 * feats["preferred_course_overlap"]
|
| 280 |
+
score += 0.3 * feats["preferred_main_overlap"]
|
| 281 |
+
|
| 282 |
+
# Cooking time preference
|
| 283 |
+
score += 0.3 * feats["within_cooking_time"]
|
| 284 |
+
|
| 285 |
+
# Missing ingredients penalty
|
| 286 |
+
# Minor penalty for missing main ingredients (after coarse filtering this is usually small)
|
| 287 |
+
score -= 0.2 * feats["missing_main_count"]
|
| 288 |
+
|
| 289 |
+
return max(score, 0.0)
|
| 290 |
+
|
| 291 |
+
# Apply scoring over the coarse candidate DataFrame
|
| 292 |
+
df = df.copy()
|
| 293 |
+
df["match_score"] = df.apply(score, axis=1)
|
| 294 |
+
df = df[df["match_score"] > 0]
|
| 295 |
+
if df.empty:
|
| 296 |
+
return df
|
| 297 |
+
df = df.sort_values("match_score", ascending=False).reset_index(drop=True)
|
| 298 |
+
|
| 299 |
+
return df
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
def ml_generate_candidates(coarse_candidates, user_parents, user_profile, model_path, topk=5):
|
| 303 |
+
"""
|
| 304 |
+
Step 3: ML-based reranking (directly after Step 2).
|
| 305 |
+
Instead of rule-based prefiltering, use the coarse-ranked candidates (Step 2 output),
|
| 306 |
+
build features in the same format as training, and apply the trained ML model to rerank.
|
| 307 |
+
"""
|
| 308 |
+
|
| 309 |
+
# Handle empty input
|
| 310 |
+
if coarse_candidates is None or len(coarse_candidates) == 0:
|
| 311 |
+
print("No candidates provided for ML reranking.")
|
| 312 |
+
return pd.DataFrame()
|
| 313 |
+
|
| 314 |
+
# If input is a list of dicts (from coarse_rank_candidates), convert to DataFrame
|
| 315 |
+
if isinstance(coarse_candidates, list):
|
| 316 |
+
df = pd.DataFrame(coarse_candidates)
|
| 317 |
+
else:
|
| 318 |
+
df = coarse_candidates.copy()
|
| 319 |
+
|
| 320 |
+
if df.empty:
|
| 321 |
+
print("Coarse candidates DataFrame is empty.")
|
| 322 |
+
return df
|
| 323 |
+
|
| 324 |
+
# Load trained model
|
| 325 |
+
model = joblib.load(model_path)
|
| 326 |
+
|
| 327 |
+
# Build feature DataFrame
|
| 328 |
+
feature_rows = []
|
| 329 |
+
for _, row in df.iterrows():
|
| 330 |
+
recipe_dict = {
|
| 331 |
+
"main": row.get("main_parent", set()),
|
| 332 |
+
"staple": row.get("staple_parent", set()),
|
| 333 |
+
"other": row.get("other_parent", set()),
|
| 334 |
+
"seasoning": row.get("seasoning_parent", set()),
|
| 335 |
+
"matched_main": len(row.get("main_parent", set()) & set(user_parents)),
|
| 336 |
+
"matched_staple": len(row.get("staple_parent", set()) & set(user_parents)),
|
| 337 |
+
"matched_other": len(row.get("other_parent", set()) & set(user_parents)),
|
| 338 |
+
"calories": row.get("calories", 0),
|
| 339 |
+
"protein": row.get("protein", 0),
|
| 340 |
+
"fat": row.get("fat", 0),
|
| 341 |
+
"region": row.get("region", ""),
|
| 342 |
+
"cuisine_attr": row.get("cuisine_attr", []),
|
| 343 |
+
"ingredients": row.get("ingredients", []),
|
| 344 |
+
"minutes": row.get("minutes", None),
|
| 345 |
+
}
|
| 346 |
+
feats = build_features(recipe_dict, user_profile)
|
| 347 |
+
feature_rows.append(feats)
|
| 348 |
+
|
| 349 |
+
feature_df = pd.DataFrame(feature_rows)
|
| 350 |
+
|
| 351 |
+
# Predict ML scores
|
| 352 |
+
if hasattr(model, "predict_proba"):
|
| 353 |
+
df["ml_score"] = model.predict_proba(feature_df)[:, 1]
|
| 354 |
+
else:
|
| 355 |
+
df["ml_score"] = model.predict(feature_df)
|
| 356 |
+
|
| 357 |
+
# normalize to 0-1
|
| 358 |
+
if len(df) > 0 and df["ml_score"].max() > df["ml_score"].min():
|
| 359 |
+
df["ml_score"] = (df["ml_score"] - df["ml_score"].min()) / (df["ml_score"].max() - df["ml_score"].min())
|
| 360 |
+
|
| 361 |
+
# Sort by ML score and return top-k candidates
|
| 362 |
+
return df.sort_values("ml_score", ascending=False).head(topk).reset_index(drop=True)
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
|
recipe_recommendation/src/coldstart.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import ast
|
| 3 |
+
import json
|
| 4 |
+
import random
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import numpy as np
|
| 7 |
+
from tqdm import tqdm
|
| 8 |
+
import warnings
|
| 9 |
+
|
| 10 |
+
from .candidate import coarse_rank_candidates, hard_filter, rule_generate_candidates
|
| 11 |
+
from .feature import build_features
|
| 12 |
+
from .io import load_recipes_csv, load_ingredient_map
|
| 13 |
+
|
| 14 |
+
RECIPES_PATH = load_recipes_csv()
|
| 15 |
+
INGREDIENT_MAP = load_ingredient_map()
|
| 16 |
+
PARENTS = INGREDIENT_MAP["parents"]
|
| 17 |
+
CHILDREN = INGREDIENT_MAP["children"]
|
| 18 |
+
|
| 19 |
+
def parse_list(x):
|
| 20 |
+
"""Convert a stringified list into a Python list safely."""
|
| 21 |
+
if pd.isna(x) or x == "":
|
| 22 |
+
return []
|
| 23 |
+
if isinstance(x, list):
|
| 24 |
+
return x
|
| 25 |
+
try:
|
| 26 |
+
return ast.literal_eval(x)
|
| 27 |
+
except Exception:
|
| 28 |
+
return []
|
| 29 |
+
|
| 30 |
+
def parse_set(x):
|
| 31 |
+
"""Convert a stringified collection into a Python set safely."""
|
| 32 |
+
if pd.isna(x) or x == "":
|
| 33 |
+
return set()
|
| 34 |
+
if isinstance(x, set):
|
| 35 |
+
return x
|
| 36 |
+
if isinstance(x, (list, tuple)):
|
| 37 |
+
return set(x)
|
| 38 |
+
if isinstance(x, str):
|
| 39 |
+
try:
|
| 40 |
+
v = ast.literal_eval(x)
|
| 41 |
+
if isinstance(v, (list, tuple, set)):
|
| 42 |
+
return set(v)
|
| 43 |
+
return {v}
|
| 44 |
+
except Exception:
|
| 45 |
+
return {x.strip()}
|
| 46 |
+
return {x}
|
| 47 |
+
|
| 48 |
+
def _parents_pool_from_df(df: pd.DataFrame):
|
| 49 |
+
cols = ["main_parent", "staple_parent", "other_parent", "seasoning_parent"]
|
| 50 |
+
pool = set()
|
| 51 |
+
for c in cols:
|
| 52 |
+
if c in df.columns:
|
| 53 |
+
for s in df[c]:
|
| 54 |
+
pool |= set(s) if isinstance(s, (set, list, tuple)) else set()
|
| 55 |
+
return sorted(pool)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def sample_user_parents(parents_pool,
|
| 59 |
+
user_profile=None,
|
| 60 |
+
prev_inventory=None,
|
| 61 |
+
min_items=3, max_items=10,
|
| 62 |
+
keep_ratio=0.6, reset_interval=20, round_idx=0):
|
| 63 |
+
liked = set((user_profile or {}).get("other_preferences", {}).get("preferred_main", []))
|
| 64 |
+
disliked = set((user_profile or {}).get("other_preferences", {}).get("disliked_main", []))
|
| 65 |
+
forbidden = set((user_profile or {}).get("forbidden_parents", [])) | disliked
|
| 66 |
+
|
| 67 |
+
pool, weights = [], []
|
| 68 |
+
for p in parents_pool:
|
| 69 |
+
if p in forbidden:
|
| 70 |
+
continue
|
| 71 |
+
w = 3.0 if p in liked else 1.0
|
| 72 |
+
pool.append(p); weights.append(w)
|
| 73 |
+
if not pool:
|
| 74 |
+
pool, weights = parents_pool[:], [1.0] * len(parents_pool)
|
| 75 |
+
|
| 76 |
+
inventory = set()
|
| 77 |
+
force_reset = (round_idx % reset_interval == 0)
|
| 78 |
+
if prev_inventory and not force_reset:
|
| 79 |
+
prev_list = list(prev_inventory); random.shuffle(prev_list)
|
| 80 |
+
keep_k = max(0, int(len(prev_list) * keep_ratio))
|
| 81 |
+
inventory |= set(prev_list[:keep_k])
|
| 82 |
+
|
| 83 |
+
k = random.randint(min_items, max_items)
|
| 84 |
+
remain = max(0, k - len(inventory))
|
| 85 |
+
for _ in range(min(remain, len(pool))):
|
| 86 |
+
idx = random.choices(range(len(pool)), weights=weights, k=1)[0]
|
| 87 |
+
inventory.add(pool[idx])
|
| 88 |
+
return list(inventory)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def _weighted_pick3(indexes, scores, temperature=1.0):
|
| 92 |
+
idxs = list(indexes)
|
| 93 |
+
scs = np.array(scores, dtype=float)
|
| 94 |
+
if np.any(scs < 0):
|
| 95 |
+
scs = scs - scs.min()
|
| 96 |
+
if scs.sum() == 0:
|
| 97 |
+
scs = np.ones_like(scs)
|
| 98 |
+
picks = []
|
| 99 |
+
for _ in range(min(3, len(idxs))):
|
| 100 |
+
probs = np.exp(scs / max(temperature, 1e-6))
|
| 101 |
+
probs = probs / probs.sum()
|
| 102 |
+
choice = np.random.choice(len(idxs), p=probs)
|
| 103 |
+
picks.append(idxs[choice])
|
| 104 |
+
idxs.pop(choice)
|
| 105 |
+
scs = np.delete(scs, choice)
|
| 106 |
+
if len(idxs) == 0:
|
| 107 |
+
break
|
| 108 |
+
return picks
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
# ---------- Main cold-start ----------
|
| 112 |
+
# ---------- Main cold-start ----------
|
| 113 |
+
def cold_start_ranker(user_id: str,
|
| 114 |
+
n_rounds: int = 10000,
|
| 115 |
+
topn_coarse: int = 5000,
|
| 116 |
+
topk_rule: int = 5,
|
| 117 |
+
batch_size: int = 5000,
|
| 118 |
+
switch_interval: int = 100):
|
| 119 |
+
"""
|
| 120 |
+
Cold-start data generation for learning-to-rank.
|
| 121 |
+
Top-5 selection prioritizes user pantry coverage deterministically:
|
| 122 |
+
1. Fully covered recipes first (missing_count == 0)
|
| 123 |
+
2. Then few missing (esp. staple/other)
|
| 124 |
+
3. Heavy penalty for missing main ingredients.
|
| 125 |
+
"""
|
| 126 |
+
base_dir = os.path.join("user_data", user_id)
|
| 127 |
+
os.makedirs(base_dir, exist_ok=True)
|
| 128 |
+
profile_path = os.path.join(base_dir, "user_profile.json")
|
| 129 |
+
features_path = os.path.join(base_dir, "user_features_rank.csv")
|
| 130 |
+
|
| 131 |
+
if os.path.exists(features_path):
|
| 132 |
+
print(f"[cold_start] Features already exist at {features_path}")
|
| 133 |
+
return features_path
|
| 134 |
+
|
| 135 |
+
with open(profile_path, "r", encoding="utf-8") as f:
|
| 136 |
+
user_profile = json.load(f)
|
| 137 |
+
|
| 138 |
+
# Load and parse recipes
|
| 139 |
+
df_all = pd.read_csv(RECIPES_PATH)
|
| 140 |
+
to_set = ["main_parent", "staple_parent", "other_parent", "seasoning_parent", "cuisine_attr"]
|
| 141 |
+
to_list = ["ingredients"]
|
| 142 |
+
for c in to_set:
|
| 143 |
+
if c in df_all.columns:
|
| 144 |
+
df_all[c] = df_all[c].apply(parse_set)
|
| 145 |
+
for c in to_list:
|
| 146 |
+
if c in df_all.columns:
|
| 147 |
+
df_all[c] = df_all[c].apply(parse_list)
|
| 148 |
+
|
| 149 |
+
# Step 1 hard filter
|
| 150 |
+
if hard_filter is not None:
|
| 151 |
+
try:
|
| 152 |
+
before = len(df_all)
|
| 153 |
+
mask = df_all.apply(lambda r: hard_filter(r.to_dict(), user_profile), axis=1)
|
| 154 |
+
df_all = df_all[mask]
|
| 155 |
+
after = len(df_all)
|
| 156 |
+
print(f"[cold_start] Step1 hard filter applied: {before} -> {after}")
|
| 157 |
+
except Exception as e:
|
| 158 |
+
warnings.warn(f"[cold_start] hard_filter failed, skip. err={e}")
|
| 159 |
+
|
| 160 |
+
n_chunks = (len(df_all) // batch_size) + 1
|
| 161 |
+
chunks = np.array_split(df_all, n_chunks)
|
| 162 |
+
parents_pool = _parents_pool_from_df(df_all)
|
| 163 |
+
rows = []
|
| 164 |
+
prev_inventory = None
|
| 165 |
+
|
| 166 |
+
for i in tqdm(range(n_rounds), desc="Cold-start rounds"):
|
| 167 |
+
chunk_id = (i // switch_interval) % n_chunks
|
| 168 |
+
df_chunk = chunks[chunk_id].copy()
|
| 169 |
+
|
| 170 |
+
# pantry sampling
|
| 171 |
+
user_parents = sample_user_parents(
|
| 172 |
+
parents_pool,
|
| 173 |
+
user_profile=user_profile,
|
| 174 |
+
prev_inventory=prev_inventory,
|
| 175 |
+
round_idx=i
|
| 176 |
+
)
|
| 177 |
+
prev_inventory = user_parents
|
| 178 |
+
|
| 179 |
+
# Step 2: coarse recall
|
| 180 |
+
coarse_list = coarse_rank_candidates(
|
| 181 |
+
recipes=df_chunk.to_dict(orient="records"),
|
| 182 |
+
user_parents=user_parents,
|
| 183 |
+
user_profile=user_profile,
|
| 184 |
+
top_n=min(topn_coarse, len(df_chunk))
|
| 185 |
+
)
|
| 186 |
+
if not coarse_list:
|
| 187 |
+
continue
|
| 188 |
+
|
| 189 |
+
coarse_df = pd.DataFrame(coarse_list)
|
| 190 |
+
|
| 191 |
+
# Step 3: rule rerank → Top-5 candidates (just for selecting the 5)
|
| 192 |
+
rule_df = rule_generate_candidates(
|
| 193 |
+
coarse_df,
|
| 194 |
+
user_parents=user_parents,
|
| 195 |
+
user_profile=user_profile
|
| 196 |
+
)
|
| 197 |
+
if rule_df.empty or len(rule_df) < topk_rule:
|
| 198 |
+
continue
|
| 199 |
+
|
| 200 |
+
top5 = rule_df.head(topk_rule).copy()
|
| 201 |
+
|
| 202 |
+
# ===== New deterministic scoring with main priority =====
|
| 203 |
+
user_set = set(user_parents)
|
| 204 |
+
weighted_scores = []
|
| 205 |
+
for idx, row in top5.iterrows():
|
| 206 |
+
main_set = set(row.get("main_parent", set()))
|
| 207 |
+
staple_set = set(row.get("staple_parent", set()))
|
| 208 |
+
other_set = set(row.get("other_parent", set()))
|
| 209 |
+
|
| 210 |
+
main_missing = len(main_set - user_set)
|
| 211 |
+
staple_missing = len(staple_set - user_set)
|
| 212 |
+
other_missing = len(other_set - user_set)
|
| 213 |
+
|
| 214 |
+
weighted_missing = 10 * main_missing + 2 * staple_missing + 1 * other_missing
|
| 215 |
+
total_missing = main_missing + staple_missing + other_missing
|
| 216 |
+
|
| 217 |
+
weighted_scores.append((idx, weighted_missing, total_missing))
|
| 218 |
+
|
| 219 |
+
sorted_pairs = sorted(weighted_scores, key=lambda x: (x[1], x[2]))
|
| 220 |
+
picked_idxs = [idx for idx, _, _ in sorted_pairs[:3]]
|
| 221 |
+
|
| 222 |
+
# relevance 3 / 2 / 1
|
| 223 |
+
labels = {idx: 0 for idx in top5.index}
|
| 224 |
+
if len(picked_idxs) > 0:
|
| 225 |
+
labels[picked_idxs[0]] = 3
|
| 226 |
+
if len(picked_idxs) > 1:
|
| 227 |
+
labels[picked_idxs[1]] = 2
|
| 228 |
+
if len(picked_idxs) > 2:
|
| 229 |
+
labels[picked_idxs[2]] = 1
|
| 230 |
+
|
| 231 |
+
# build features for all 5 candidates
|
| 232 |
+
for idx, row in top5.iterrows():
|
| 233 |
+
up = set(user_parents)
|
| 234 |
+
main_set = set(row.get("main_parent", set()))
|
| 235 |
+
staple_set = set(row.get("staple_parent", set()))
|
| 236 |
+
other_set = set(row.get("other_parent", set()))
|
| 237 |
+
|
| 238 |
+
recipe_dict = {
|
| 239 |
+
"main": main_set,
|
| 240 |
+
"staple": staple_set,
|
| 241 |
+
"other": other_set,
|
| 242 |
+
"seasoning": set(row.get("seasoning_parent", set())),
|
| 243 |
+
"matched_main": len(main_set & up),
|
| 244 |
+
"matched_staple": len(staple_set & up),
|
| 245 |
+
"matched_other": len(other_set & up),
|
| 246 |
+
"calories": row.get("calories", 0),
|
| 247 |
+
"protein": row.get("protein", 0),
|
| 248 |
+
"fat": row.get("fat", 0),
|
| 249 |
+
"region": row.get("region", ""),
|
| 250 |
+
"cuisine_attr": row.get("cuisine_attr", []),
|
| 251 |
+
"ingredients": row.get("ingredients", []),
|
| 252 |
+
"minutes": row.get("minutes", None),
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
feats = build_features(recipe_dict, user_profile)
|
| 256 |
+
feats["relevance"] = float(labels[idx])
|
| 257 |
+
feats["qid"] = int(i)
|
| 258 |
+
rows.append(feats)
|
| 259 |
+
|
| 260 |
+
out = pd.DataFrame(rows)
|
| 261 |
+
valid_qids = out.groupby("qid").size()
|
| 262 |
+
keep_qids = valid_qids[valid_qids > 1].index
|
| 263 |
+
out = out[out["qid"].isin(keep_qids)].reset_index(drop=True)
|
| 264 |
+
|
| 265 |
+
out_path = os.path.join("user_data", user_id, "user_features_rank.csv")
|
| 266 |
+
out.to_csv(out_path, index=False)
|
| 267 |
+
print(f"[cold_start] Saved {len(out)} rows to {out_path}")
|
| 268 |
+
return out_path
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
if __name__ == "__main__":
|
| 272 |
+
cold_start_ranker(
|
| 273 |
+
user_id="user_1",
|
| 274 |
+
n_rounds=10000,
|
| 275 |
+
topn_coarse=20000,
|
| 276 |
+
topk_rule=5,
|
| 277 |
+
coverage_penalty=0.15,
|
| 278 |
+
temperature=0.5
|
| 279 |
+
)
|
recipe_recommendation/src/embedding.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import numpy as np
|
| 4 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 5 |
+
|
| 6 |
+
def profile_to_embedding(profile):
|
| 7 |
+
"""
|
| 8 |
+
Convert a normalized user profile into a fixed-length numeric embedding.
|
| 9 |
+
Embedding structure:
|
| 10 |
+
[diet (3)] + [allergies (6)] + [region (6)] +
|
| 11 |
+
[nutritional goals (4)] + [preferred_main (8)] + [cooking_time (1)]
|
| 12 |
+
Total dim ≈ 28
|
| 13 |
+
"""
|
| 14 |
+
vecs = []
|
| 15 |
+
|
| 16 |
+
# 1. Diet (one-hot)
|
| 17 |
+
diet_types = ["vegetarian", "flexible", "non_vegetarian"]
|
| 18 |
+
diet_vec = np.zeros(len(diet_types))
|
| 19 |
+
diet_value = profile.get("diet", {}).get("vegetarian_type", "flexible")
|
| 20 |
+
if diet_value in diet_types:
|
| 21 |
+
diet_vec[diet_types.index(diet_value)] = 1
|
| 22 |
+
vecs.append(diet_vec)
|
| 23 |
+
|
| 24 |
+
# 2. Allergies (multi-hot)
|
| 25 |
+
allergy_vocab = ["milk", "gluten", "peanut", "shrimp", "egg", "soy"]
|
| 26 |
+
allergies = set(profile.get("allergies", []))
|
| 27 |
+
allergy_vec = np.array([1 if a in allergies else 0 for a in allergy_vocab])
|
| 28 |
+
vecs.append(allergy_vec)
|
| 29 |
+
|
| 30 |
+
# 3. Region preferences (multi-hot)
|
| 31 |
+
region_vocab = ["North America", "Latin America", "Europe", "Asia", "Middle East", "Africa"]
|
| 32 |
+
regions = set(profile.get("region_preference", []))
|
| 33 |
+
region_vec = np.array([1 if r in regions else 0 for r in region_vocab])
|
| 34 |
+
vecs.append(region_vec)
|
| 35 |
+
|
| 36 |
+
# 4. Nutritional goals (normalized)
|
| 37 |
+
ng = profile.get("nutritional_goals", {})
|
| 38 |
+
cal = ng.get("calories", {})
|
| 39 |
+
pro = ng.get("protein", {})
|
| 40 |
+
|
| 41 |
+
cal_min = cal.get("min", 0) / 4000
|
| 42 |
+
cal_max = min(cal.get("max", 9999), 4000) / 4000
|
| 43 |
+
pro_min = pro.get("min", 0) / 300
|
| 44 |
+
pro_max = min(pro.get("max", 999), 300) / 300
|
| 45 |
+
|
| 46 |
+
vecs.append(np.array([cal_min, cal_max, pro_min, pro_max]))
|
| 47 |
+
|
| 48 |
+
# 5. Preferred main ingredients (multi-hot)
|
| 49 |
+
main_vocab = ["chicken", "tofu", "beef", "salmon", "eggs", "pork", "beans", "mushroom"]
|
| 50 |
+
mains = set(profile.get("other_preferences", {}).get("preferred_main", []))
|
| 51 |
+
main_vec = np.array([1 if m in mains else 0 for m in main_vocab])
|
| 52 |
+
vecs.append(main_vec)
|
| 53 |
+
|
| 54 |
+
# 6. Cooking time max (normalized to [0,1], assume 120 min upper bound)
|
| 55 |
+
t = profile.get("other_preferences", {}).get("cooking_time_max")
|
| 56 |
+
t_vec = np.array([min(t / 120, 1)]) if t is not None else np.array([0])
|
| 57 |
+
vecs.append(t_vec)
|
| 58 |
+
|
| 59 |
+
return np.concatenate(vecs)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def profile_similarity(profile_a, profile_b):
|
| 63 |
+
"""Compute cosine similarity between two user profiles."""
|
| 64 |
+
emb_a = profile_to_embedding(profile_a).reshape(1, -1)
|
| 65 |
+
emb_b = profile_to_embedding(profile_b).reshape(1, -1)
|
| 66 |
+
return cosine_similarity(emb_a, emb_b)[0, 0]
|
| 67 |
+
|
| 68 |
+
def find_most_similar_user(target_user_id, user_data_dir="recipe_recommendation/user_data", threshold=0.85):
|
| 69 |
+
"""
|
| 70 |
+
Find the most similar existing user based on profile embeddings.
|
| 71 |
+
Returns (best_match_user_id, similarity_score) or (None, -1) if no match.
|
| 72 |
+
"""
|
| 73 |
+
target_profile_path = os.path.join(user_data_dir, target_user_id, "user_profile.json")
|
| 74 |
+
if not os.path.exists(target_profile_path):
|
| 75 |
+
raise FileNotFoundError(f"[embedding] No profile found for user {target_user_id}")
|
| 76 |
+
|
| 77 |
+
with open(target_profile_path, "r", encoding="utf-8") as f:
|
| 78 |
+
target_profile = json.load(f)
|
| 79 |
+
target_emb = profile_to_embedding(target_profile).reshape(1, -1)
|
| 80 |
+
|
| 81 |
+
best_match, best_score = None, -1
|
| 82 |
+
|
| 83 |
+
for uid in os.listdir(user_data_dir):
|
| 84 |
+
if uid == target_user_id:
|
| 85 |
+
continue
|
| 86 |
+
profile_path = os.path.join(user_data_dir, uid, "user_profile.json")
|
| 87 |
+
if not os.path.exists(profile_path):
|
| 88 |
+
continue
|
| 89 |
+
with open(profile_path, "r", encoding="utf-8") as f:
|
| 90 |
+
other_profile = json.load(f)
|
| 91 |
+
other_emb = profile_to_embedding(other_profile).reshape(1, -1)
|
| 92 |
+
sim = cosine_similarity(target_emb, other_emb)[0, 0]
|
| 93 |
+
if sim > best_score:
|
| 94 |
+
best_match, best_score = uid, sim
|
| 95 |
+
|
| 96 |
+
if best_match and best_score >= threshold:
|
| 97 |
+
print(f"[embedding] Found similar user: {best_match} (similarity={best_score:.3f})")
|
| 98 |
+
return best_match, best_score
|
| 99 |
+
|
| 100 |
+
return None, -1
|
recipe_recommendation/src/feature.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from .io import load_ingredient_map
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
# Load ingredient map globally to avoid repeated I/O
|
| 6 |
+
INGREDIENT_MAP = load_ingredient_map()
|
| 7 |
+
PARENTS = INGREDIENT_MAP["parents"]
|
| 8 |
+
CHILDREN = INGREDIENT_MAP["children"]
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def is_recipe_vegetarian_safe(ingredients: list[str], veg_type: str) -> bool:
|
| 12 |
+
"""
|
| 13 |
+
Check if the recipe is safe for a given dietary type.
|
| 14 |
+
Supported veg_type: "vegan", "vegetarian", "flexible_vegetarian", "" (none).
|
| 15 |
+
"""
|
| 16 |
+
for ing in ingredients:
|
| 17 |
+
ing_lower = ing.strip().lower()
|
| 18 |
+
if ing_lower in CHILDREN:
|
| 19 |
+
info = CHILDREN[ing_lower]
|
| 20 |
+
elif ing_lower in PARENTS:
|
| 21 |
+
info = PARENTS[ing_lower]
|
| 22 |
+
else:
|
| 23 |
+
# If the ingredient is not found in the map, treat it as safe by default.
|
| 24 |
+
continue
|
| 25 |
+
|
| 26 |
+
if veg_type == "vegan" and not info.get("vegan_safe", True):
|
| 27 |
+
return False
|
| 28 |
+
if veg_type == "vegetarian" and not info.get("vegetarian_safe", True):
|
| 29 |
+
return False
|
| 30 |
+
if veg_type == "flexible_vegetarian":
|
| 31 |
+
# Flexible vegetarians allow most ingredients except explicit meat.
|
| 32 |
+
# Here, we can use vegetarian_safe as a proxy for flexibility.
|
| 33 |
+
if not info.get("vegetarian_safe", True):
|
| 34 |
+
return False
|
| 35 |
+
return True
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def build_features(recipe: dict, user_profile: dict) -> dict:
|
| 39 |
+
"""
|
| 40 |
+
Build a feature dictionary for ML ranker and rule-based scoring.
|
| 41 |
+
All features are numeric scalars or counts.
|
| 42 |
+
"""
|
| 43 |
+
features = {}
|
| 44 |
+
|
| 45 |
+
# Ingredient matching ratios
|
| 46 |
+
total_main = len(recipe.get("main", []))
|
| 47 |
+
total_other = len(recipe.get("other", []))
|
| 48 |
+
total_staple = len(recipe.get("staple", []))
|
| 49 |
+
|
| 50 |
+
features["main_match_ratio"] = recipe.get("matched_main", 0) / max(total_main, 1)
|
| 51 |
+
features["other_match_ratio"] = recipe.get("matched_other", 0) / max(total_other, 1)
|
| 52 |
+
features["staple_match_ratio"] = recipe.get("matched_staple", 0) / max(total_staple, 1)
|
| 53 |
+
|
| 54 |
+
features["missing_main_count"] = total_main - recipe.get("matched_main", 0)
|
| 55 |
+
features["missing_other_count"] = total_other - recipe.get("matched_other", 0)
|
| 56 |
+
features["missing_staple_count"] = total_staple - recipe.get("matched_staple", 0)
|
| 57 |
+
|
| 58 |
+
# Nutrition information
|
| 59 |
+
calories = recipe.get("calories", 0)
|
| 60 |
+
protein = recipe.get("protein", 0)
|
| 61 |
+
fat = recipe.get("fat", 0)
|
| 62 |
+
features["calories"] = calories
|
| 63 |
+
features["protein"] = protein
|
| 64 |
+
features["fat"] = fat
|
| 65 |
+
features["protein_ratio"] = protein / max(calories, 1)
|
| 66 |
+
features["fat_ratio"] = fat / max(calories, 1)
|
| 67 |
+
|
| 68 |
+
# Regional preference
|
| 69 |
+
recipe_region = recipe.get("region", "")
|
| 70 |
+
if isinstance(recipe_region, set):
|
| 71 |
+
features["region_match"] = int(any(
|
| 72 |
+
r in user_profile.get("preferred_regions", []) for r in recipe_region
|
| 73 |
+
))
|
| 74 |
+
else:
|
| 75 |
+
features["region_match"] = int(
|
| 76 |
+
recipe_region in user_profile.get("preferred_regions", [])
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# Diet constraints
|
| 80 |
+
ingredients_all = recipe.get("ingredients", [])
|
| 81 |
+
|
| 82 |
+
# Vegan-safe check (absolute, independent of user)
|
| 83 |
+
features["is_vegan_safe"] = int(is_recipe_vegetarian_safe(ingredients_all, "vegan"))
|
| 84 |
+
|
| 85 |
+
# Vegetarian-safe check (absolute, independent of user)
|
| 86 |
+
features["is_vegetarian_safe_absolute"] = int(
|
| 87 |
+
is_recipe_vegetarian_safe(ingredients_all, "vegetarian")
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
# Flexible vegetarian-safe check (absolute, independent of user)
|
| 91 |
+
features["is_flexible_safe_absolute"] = int(
|
| 92 |
+
is_recipe_vegetarian_safe(ingredients_all, "flexible_vegetarian")
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
# User diet safety (depends on user_profile)
|
| 96 |
+
veg_type = (user_profile.get("diet", {}).get("vegetarian_type", "") or "").lower()
|
| 97 |
+
features["is_user_diet_safe"] = int(is_recipe_vegetarian_safe(ingredients_all, veg_type))
|
| 98 |
+
|
| 99 |
+
# Calorie preference
|
| 100 |
+
calorie_threshold = user_profile.get("calorie_threshold", 9999)
|
| 101 |
+
features["low_calorie_penalty"] = int(calories <= calorie_threshold)
|
| 102 |
+
|
| 103 |
+
# Main ingredient preference
|
| 104 |
+
preferred_main = set(user_profile.get("other_preferences", {}).get("preferred_main", []))
|
| 105 |
+
recipe_main = set(recipe.get("main", []))
|
| 106 |
+
features["preferred_main_overlap"] = len(recipe_main & preferred_main)
|
| 107 |
+
|
| 108 |
+
# Course type preference
|
| 109 |
+
# e.g. user may prefer 'main-dish' or 'desserts'
|
| 110 |
+
recipe_types = set(recipe.get("cuisine_attr", []))
|
| 111 |
+
preferred_types = set(user_profile.get("preferred_course_types", []))
|
| 112 |
+
features["preferred_course_overlap"] = len(recipe_types & preferred_types)
|
| 113 |
+
|
| 114 |
+
# Cooking time preference
|
| 115 |
+
cooking_time_max = user_profile.get("other_preferences", {}).get("cooking_time_max", None)
|
| 116 |
+
if cooking_time_max:
|
| 117 |
+
features["within_cooking_time"] = int(recipe.get("minutes", 9999) <= cooking_time_max)
|
| 118 |
+
else:
|
| 119 |
+
features["within_cooking_time"] = 1
|
| 120 |
+
|
| 121 |
+
return features
|
| 122 |
+
|
| 123 |
+
def build_cluster_features(candidates):
|
| 124 |
+
"""
|
| 125 |
+
Build simple ingredient + cuisine based feature vectors for KMeans clustering.
|
| 126 |
+
This is separate from model training features.
|
| 127 |
+
|
| 128 |
+
Args:
|
| 129 |
+
candidates (list[dict]): list of recipe dicts.
|
| 130 |
+
|
| 131 |
+
Returns:
|
| 132 |
+
np.ndarray: feature matrix (num_candidates, num_features)
|
| 133 |
+
"""
|
| 134 |
+
# 1. Collect vocabulary for ingredients and cuisine
|
| 135 |
+
all_main = set()
|
| 136 |
+
all_staple = set()
|
| 137 |
+
all_other = set()
|
| 138 |
+
all_cuisine = set()
|
| 139 |
+
|
| 140 |
+
for r in candidates:
|
| 141 |
+
all_main.update(r.get("main_parent", []) or [])
|
| 142 |
+
all_staple.update(r.get("staple_parent", []) or [])
|
| 143 |
+
all_other.update(r.get("other_parent", []) or [])
|
| 144 |
+
all_cuisine.update(r.get("cuisine_attr", []) or [])
|
| 145 |
+
|
| 146 |
+
main_vocab = sorted(all_main)
|
| 147 |
+
staple_vocab = sorted(all_staple)
|
| 148 |
+
other_vocab = sorted(all_other)
|
| 149 |
+
cuisine_vocab = sorted(all_cuisine)
|
| 150 |
+
|
| 151 |
+
# 2. Build index map
|
| 152 |
+
main_idx = {p: i for i, p in enumerate(main_vocab)}
|
| 153 |
+
staple_idx = {p: i + len(main_vocab) for i, p in enumerate(staple_vocab)}
|
| 154 |
+
other_idx = {p: i + len(main_vocab) + len(staple_vocab) for i, p in enumerate(other_vocab)}
|
| 155 |
+
cuisine_idx = {p: i + len(main_vocab) + len(staple_vocab) + len(other_vocab)
|
| 156 |
+
for i, p in enumerate(cuisine_vocab)}
|
| 157 |
+
|
| 158 |
+
dim = len(main_vocab) + len(staple_vocab) + len(other_vocab) + len(cuisine_vocab)
|
| 159 |
+
X = np.zeros((len(candidates), dim), dtype=np.uint8)
|
| 160 |
+
|
| 161 |
+
# 3. Fill feature matrix
|
| 162 |
+
for i, r in enumerate(candidates):
|
| 163 |
+
for p in r.get("main_parent", []) or []:
|
| 164 |
+
if p in main_idx:
|
| 165 |
+
X[i, main_idx[p]] = 1
|
| 166 |
+
for p in r.get("staple_parent", []) or []:
|
| 167 |
+
if p in staple_idx:
|
| 168 |
+
X[i, staple_idx[p]] = 1
|
| 169 |
+
for p in r.get("other_parent", []) or []:
|
| 170 |
+
if p in other_idx:
|
| 171 |
+
X[i, other_idx[p]] = 1
|
| 172 |
+
for p in r.get("cuisine_attr", []) or []:
|
| 173 |
+
if p in cuisine_idx:
|
| 174 |
+
X[i, cuisine_idx[p]] = 1
|
| 175 |
+
|
| 176 |
+
return X
|
recipe_recommendation/src/highlight.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from sklearn.cluster import KMeans
|
| 3 |
+
from sklearn.preprocessing import StandardScaler
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def print_candidates(candidates, user_parents, topk=10):
|
| 8 |
+
shown = 0
|
| 9 |
+
max_score = candidates['match_score'].max()
|
| 10 |
+
min_score = candidates['match_score'].min()
|
| 11 |
+
|
| 12 |
+
for _, row in candidates.head(topk).iterrows():
|
| 13 |
+
scaled_score = 100 * row['match_score'] / (max_score + 1e-9)
|
| 14 |
+
print(f"{row['name']} (score {scaled_score:.1f}%)")
|
| 15 |
+
|
| 16 |
+
# ----- Region -----
|
| 17 |
+
region = row.get("region", None)
|
| 18 |
+
if pd.notna(region) and isinstance(region, str) and region.strip() and region.lower() != "unavailable":
|
| 19 |
+
print(f" region: {region}")
|
| 20 |
+
|
| 21 |
+
# ----- Cuisine Attributes -----
|
| 22 |
+
cuisine = row.get("cuisine_attr", None)
|
| 23 |
+
if cuisine is not None and not (isinstance(cuisine, float) and pd.isna(cuisine)):
|
| 24 |
+
# Convert set to list for printing
|
| 25 |
+
if isinstance(cuisine, set):
|
| 26 |
+
cuisine = list(cuisine)
|
| 27 |
+
elif isinstance(cuisine, str):
|
| 28 |
+
cuisine = [cuisine]
|
| 29 |
+
|
| 30 |
+
if isinstance(cuisine, list) and len(cuisine) > 0:
|
| 31 |
+
print(f" cuisine: {', '.join(cuisine)}")
|
| 32 |
+
|
| 33 |
+
# ----- Nutrition -----
|
| 34 |
+
print(f" calories: {row.get('calories', 'N/A')}")
|
| 35 |
+
|
| 36 |
+
# ----- Ingredient Marking -----
|
| 37 |
+
def mark_list(lst):
|
| 38 |
+
return [("✅ " + ing) if ing in user_parents else ("❌ " + ing) for ing in lst]
|
| 39 |
+
|
| 40 |
+
print(f" staple: {mark_list(row.get('staple_parent', []))}")
|
| 41 |
+
print(f" main: {mark_list(row.get('main_parent', []))}")
|
| 42 |
+
print(f" seasoning: {row.get('seasoning_parent', [])}")
|
| 43 |
+
print(f" other: {mark_list(row.get('other_parent', []))}")
|
| 44 |
+
print("-" * 40)
|
| 45 |
+
|
| 46 |
+
shown += 1
|
| 47 |
+
|
| 48 |
+
def diversify_topk_with_min_clusters(
|
| 49 |
+
ranked_candidates,
|
| 50 |
+
feature_matrix,
|
| 51 |
+
top_k=5,
|
| 52 |
+
n_clusters=20,
|
| 53 |
+
min_clusters=3,
|
| 54 |
+
random_state=42
|
| 55 |
+
):
|
| 56 |
+
"""
|
| 57 |
+
Diversify top-k displayed recipes using KMeans clustering.
|
| 58 |
+
Ensures that the final top_k contains at least `min_clusters` distinct clusters.
|
| 59 |
+
"""
|
| 60 |
+
if len(ranked_candidates) == 0:
|
| 61 |
+
return []
|
| 62 |
+
|
| 63 |
+
n_clusters = min(n_clusters, len(ranked_candidates))
|
| 64 |
+
scaler = StandardScaler()
|
| 65 |
+
X_scaled = scaler.fit_transform(feature_matrix)
|
| 66 |
+
|
| 67 |
+
# KMeans clustering
|
| 68 |
+
kmeans = KMeans(n_clusters=n_clusters, n_init='auto', random_state=random_state)
|
| 69 |
+
cluster_ids = kmeans.fit_predict(X_scaled)
|
| 70 |
+
|
| 71 |
+
# Step 1: pick candidates from distinct clusters until min_clusters reached
|
| 72 |
+
picked = []
|
| 73 |
+
picked_clusters = set()
|
| 74 |
+
for i, c in enumerate(cluster_ids):
|
| 75 |
+
if c not in picked_clusters:
|
| 76 |
+
picked.append(ranked_candidates[i])
|
| 77 |
+
picked_clusters.add(c)
|
| 78 |
+
if len(picked_clusters) >= min_clusters or len(picked) >= top_k:
|
| 79 |
+
break
|
| 80 |
+
|
| 81 |
+
# Step 2: fill the rest purely by rank order
|
| 82 |
+
if len(picked) < top_k:
|
| 83 |
+
for i, c in enumerate(cluster_ids):
|
| 84 |
+
if ranked_candidates[i] not in picked:
|
| 85 |
+
picked.append(ranked_candidates[i])
|
| 86 |
+
if len(picked) >= top_k:
|
| 87 |
+
break
|
| 88 |
+
|
| 89 |
+
return picked
|
| 90 |
+
|
| 91 |
+
|
recipe_recommendation/src/io.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
from huggingface_hub import hf_hub_download
|
| 4 |
+
|
| 5 |
+
# Hugging Face ID
|
| 6 |
+
REPO_ID = "Iris314/recipe-cleaned"
|
| 7 |
+
|
| 8 |
+
ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
| 9 |
+
DATA_DIR = os.path.join(ROOT_DIR, "data")
|
| 10 |
+
os.makedirs(DATA_DIR, exist_ok=True)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def download_file(filename: str) -> str:
|
| 14 |
+
|
| 15 |
+
local_path = os.path.join(DATA_DIR, filename)
|
| 16 |
+
if not os.path.exists(local_path):
|
| 17 |
+
print(f"Downloading {filename} from Hugging Face Hub...")
|
| 18 |
+
hf_hub_download(
|
| 19 |
+
repo_id=REPO_ID,
|
| 20 |
+
filename=filename,
|
| 21 |
+
repo_type="dataset",
|
| 22 |
+
local_dir=DATA_DIR,
|
| 23 |
+
local_dir_use_symlinks=False
|
| 24 |
+
)
|
| 25 |
+
else:
|
| 26 |
+
print(f"{filename} already exists locally.")
|
| 27 |
+
return local_path
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def load_recipes_csv() -> str:
|
| 31 |
+
return download_file("recipes.csv")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def load_ingredient_map() -> dict:
|
| 35 |
+
path = download_file("ingredient_map.data")
|
| 36 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 37 |
+
return json.load(f)
|
recipe_recommendation/src/trainmodel.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import joblib
|
| 3 |
+
import warnings
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from typing import List, Tuple, Sequence, Optional
|
| 7 |
+
from xgboost import XGBRanker
|
| 8 |
+
from sklearn.model_selection import train_test_split
|
| 9 |
+
from sklearn.metrics import ndcg_score
|
| 10 |
+
from pandas.api.types import is_numeric_dtype
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# ----------------------------- Helpers -----------------------------
|
| 14 |
+
def _pick_feature_cols(df: pd.DataFrame, drop_cols: Sequence[str]) -> List[str]:
|
| 15 |
+
"""
|
| 16 |
+
Pick numeric feature columns robustly, excluding drop_cols.
|
| 17 |
+
Uses pandas is_numeric_dtype to correctly include nullable ints/floats/bools.
|
| 18 |
+
"""
|
| 19 |
+
cols = []
|
| 20 |
+
for c in df.columns:
|
| 21 |
+
if c in drop_cols:
|
| 22 |
+
continue
|
| 23 |
+
if is_numeric_dtype(df[c]):
|
| 24 |
+
cols.append(c)
|
| 25 |
+
return cols
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _sort_and_pack_by_qid(
|
| 29 |
+
X: pd.DataFrame, y: pd.Series, qid: pd.Series, feature_cols: List[str]
|
| 30 |
+
) -> Tuple[pd.DataFrame, np.ndarray, List[int], np.ndarray]:
|
| 31 |
+
"""
|
| 32 |
+
Sort rows by qid so that group sizes match the sample order.
|
| 33 |
+
Returns:
|
| 34 |
+
X_sorted, y_sorted, groups, qid_sorted (aligned with X_sorted/y_sorted)
|
| 35 |
+
"""
|
| 36 |
+
packed = X.copy()
|
| 37 |
+
packed["_label"] = y.values
|
| 38 |
+
packed["_qid"] = qid.values
|
| 39 |
+
packed = packed.sort_values("_qid").reset_index(drop=True)
|
| 40 |
+
|
| 41 |
+
groups = packed.groupby("_qid").size().tolist()
|
| 42 |
+
X_sorted = packed[feature_cols].copy()
|
| 43 |
+
y_sorted = packed["_label"].astype(float).values
|
| 44 |
+
qid_sorted = packed["_qid"].values
|
| 45 |
+
return X_sorted, y_sorted, groups, qid_sorted
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _eval_mean_ndcg(
|
| 49 |
+
model: XGBRanker,
|
| 50 |
+
X_val: pd.DataFrame,
|
| 51 |
+
y_val, # can be np.ndarray or pd.Series
|
| 52 |
+
qid_val, # aligned with X_val/y_val
|
| 53 |
+
ks: Sequence[int] = (5, 10),
|
| 54 |
+
) -> dict:
|
| 55 |
+
"""
|
| 56 |
+
Compute mean NDCG@k for each k in ks over validation queries.
|
| 57 |
+
Accepts numpy arrays or pandas Series.
|
| 58 |
+
"""
|
| 59 |
+
# Try to respect early-stopping best iteration if available (xgboost>=2.0)
|
| 60 |
+
try:
|
| 61 |
+
preds = model.predict(X_val, iteration_range=(0, model.best_iteration + 1))
|
| 62 |
+
except Exception:
|
| 63 |
+
preds = model.predict(X_val)
|
| 64 |
+
|
| 65 |
+
y_arr = np.asarray(y_val)
|
| 66 |
+
q_arr = np.asarray(qid_val)
|
| 67 |
+
|
| 68 |
+
out = {}
|
| 69 |
+
for k in ks:
|
| 70 |
+
ndcgs = []
|
| 71 |
+
for q in np.unique(q_arr):
|
| 72 |
+
mask = (q_arr == q)
|
| 73 |
+
if mask.sum() < 2:
|
| 74 |
+
continue
|
| 75 |
+
ndcgs.append(ndcg_score([y_arr[mask]], [preds[mask]], k=k))
|
| 76 |
+
out[f"NDCG@{k}"] = float(np.mean(ndcgs)) if ndcgs else 0.0
|
| 77 |
+
return out
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# ----------------------------- Main Trainer -----------------------------
|
| 82 |
+
def train_model_ranker(
|
| 83 |
+
user_id: str = "user_1",
|
| 84 |
+
features_path: Optional[str] = None,
|
| 85 |
+
save_model: bool = True,
|
| 86 |
+
model_params: Optional[dict] = None,
|
| 87 |
+
val_ratio: float = 0.2,
|
| 88 |
+
random_state: int = 42,
|
| 89 |
+
max_rows: Optional[int] = None,
|
| 90 |
+
):
|
| 91 |
+
"""
|
| 92 |
+
Train an XGBoost Learning-to-Rank model (XGBRanker) on cold-start generated data.
|
| 93 |
+
|
| 94 |
+
Expected input CSV (from cold_start.py):
|
| 95 |
+
- qid: query id (one round of pantry sampling = one query)
|
| 96 |
+
- relevance: graded relevance label (e.g., 3/2/1/0)
|
| 97 |
+
- features: numeric columns produced by build_features (and any extra numeric signals)
|
| 98 |
+
|
| 99 |
+
The function:
|
| 100 |
+
1) Reads the CSV
|
| 101 |
+
2) Selects numeric feature columns robustly
|
| 102 |
+
3) Splits train/val by qid to avoid leakage
|
| 103 |
+
4) Sorts each split by qid and builds group sizes aligned to sample order
|
| 104 |
+
5) Trains XGBRanker and reports NDCG@5/10
|
| 105 |
+
6) Saves model to user_data/<user_id>/ranker.pkl
|
| 106 |
+
"""
|
| 107 |
+
base_dir = os.path.join("user_data", user_id)
|
| 108 |
+
os.makedirs(base_dir, exist_ok=True)
|
| 109 |
+
|
| 110 |
+
# Resolve features path
|
| 111 |
+
if features_path is None:
|
| 112 |
+
features_path = os.path.join(base_dir, "user_features_rank.csv")
|
| 113 |
+
if not os.path.exists(features_path):
|
| 114 |
+
raise FileNotFoundError(
|
| 115 |
+
f"[train_model_ranker] Cold-start features not found at: {features_path}\n"
|
| 116 |
+
f"Please run cold_start_ranker(user_id='{user_id}') first."
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
# Load data
|
| 120 |
+
df = pd.read_csv(features_path)
|
| 121 |
+
if max_rows is not None and len(df) > max_rows:
|
| 122 |
+
df = df.sample(max_rows, random_state=random_state).reset_index(drop=True)
|
| 123 |
+
|
| 124 |
+
# Basic validation
|
| 125 |
+
if "qid" not in df.columns or "relevance" not in df.columns:
|
| 126 |
+
raise ValueError("Input CSV must contain 'qid' and 'relevance' columns.")
|
| 127 |
+
|
| 128 |
+
# Fill NaNs in label/qid (should not happen, but defensive)
|
| 129 |
+
df["qid"] = pd.to_numeric(df["qid"], errors="coerce").fillna(-1).astype(int)
|
| 130 |
+
df["relevance"] = pd.to_numeric(df["relevance"], errors="coerce").fillna(0).astype(float)
|
| 131 |
+
|
| 132 |
+
# Pick numeric feature columns robustly
|
| 133 |
+
drop_cols = {"qid", "relevance"}
|
| 134 |
+
feature_cols = _pick_feature_cols(df, drop_cols)
|
| 135 |
+
if not feature_cols:
|
| 136 |
+
raise ValueError("No numeric feature columns found in dataset.")
|
| 137 |
+
|
| 138 |
+
# Ensure numeric + finite values only (replace inf/nan with 0)
|
| 139 |
+
df[feature_cols] = df[feature_cols].apply(pd.to_numeric, errors="coerce")
|
| 140 |
+
df[feature_cols] = df[feature_cols].replace([np.inf, -np.inf], np.nan).fillna(0.0)
|
| 141 |
+
|
| 142 |
+
# Split by qid to avoid leakage across queries
|
| 143 |
+
unique_qids = df["qid"].unique()
|
| 144 |
+
if len(unique_qids) < 2:
|
| 145 |
+
warnings.warn("Only one unique qid found — ranking training may be ineffective.")
|
| 146 |
+
train_qids, val_qids = train_test_split(
|
| 147 |
+
unique_qids, test_size=val_ratio, random_state=random_state
|
| 148 |
+
)
|
| 149 |
+
train_mask = df["qid"].isin(train_qids)
|
| 150 |
+
val_mask = df["qid"].isin(val_qids)
|
| 151 |
+
|
| 152 |
+
# Split dataframes
|
| 153 |
+
X_train_raw = df.loc[train_mask, feature_cols]
|
| 154 |
+
y_train_raw = df.loc[train_mask, "relevance"]
|
| 155 |
+
qid_train = df.loc[train_mask, "qid"]
|
| 156 |
+
|
| 157 |
+
X_val_raw = df.loc[val_mask, feature_cols]
|
| 158 |
+
y_val_raw = df.loc[val_mask, "relevance"]
|
| 159 |
+
qid_val = df.loc[val_mask, "qid"]
|
| 160 |
+
|
| 161 |
+
# Sort by qid and build group sizes aligned with sample order (CRITICAL for XGBRanker)
|
| 162 |
+
X_train, y_train, group_train, _ = _sort_and_pack_by_qid(
|
| 163 |
+
X_train_raw, y_train_raw, qid_train, feature_cols
|
| 164 |
+
)
|
| 165 |
+
X_val, y_val, group_val, qid_val_sorted = _sort_and_pack_by_qid(
|
| 166 |
+
X_val_raw, y_val_raw, qid_val, feature_cols
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
print(f"[ranker] #Train groups: {len(group_train)} | #Val groups: {len(group_val)}")
|
| 171 |
+
print(f"[ranker] Train rows: {len(X_train)} | Val rows: {len(X_val)} | #Features: {len(feature_cols)}")
|
| 172 |
+
|
| 173 |
+
# Default model params
|
| 174 |
+
default_params = dict(
|
| 175 |
+
objective="rank:ndcg",
|
| 176 |
+
eval_metric="ndcg",
|
| 177 |
+
n_estimators=400,
|
| 178 |
+
learning_rate=0.08,
|
| 179 |
+
max_depth=6,
|
| 180 |
+
subsample=0.8,
|
| 181 |
+
colsample_bytree=0.8,
|
| 182 |
+
random_state=random_state,
|
| 183 |
+
tree_method="hist",
|
| 184 |
+
reg_lambda=1.0,
|
| 185 |
+
reg_alpha=0.0,
|
| 186 |
+
)
|
| 187 |
+
if model_params:
|
| 188 |
+
default_params.update(model_params)
|
| 189 |
+
|
| 190 |
+
model = XGBRanker(**default_params)
|
| 191 |
+
|
| 192 |
+
# Fit model (XGBRanker requires group/group for eval_set as well)
|
| 193 |
+
fit_kwargs = dict(
|
| 194 |
+
X=X_train,
|
| 195 |
+
y=y_train,
|
| 196 |
+
group=group_train,
|
| 197 |
+
eval_set=[(X_val, y_val)],
|
| 198 |
+
eval_group=[group_val],
|
| 199 |
+
verbose=False,
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
try:
|
| 203 |
+
# Newer xgboost versions (some builds) support early_stopping_rounds on Ranker
|
| 204 |
+
model.fit(early_stopping_rounds=50, **fit_kwargs) # maximize=True is inferred by 'ndcg'
|
| 205 |
+
except TypeError:
|
| 206 |
+
# Fallback to callback API (older versions)
|
| 207 |
+
try:
|
| 208 |
+
from xgboost.callback import EarlyStopping
|
| 209 |
+
model.fit(callbacks=[EarlyStopping(rounds=50, save_best=True, maximize=True)], **fit_kwargs)
|
| 210 |
+
except Exception:
|
| 211 |
+
# Last resort: train without early stopping
|
| 212 |
+
model.fit(**fit_kwargs)
|
| 213 |
+
|
| 214 |
+
# Evaluate mean NDCG@5/10
|
| 215 |
+
metrics = _eval_mean_ndcg(model, X_val, y_val, qid_val_sorted, ks=(5, 10))
|
| 216 |
+
|
| 217 |
+
print("[ranker] Validation metrics:", " ".join(f"{k}={v:.4f}" for k, v in metrics.items()))
|
| 218 |
+
|
| 219 |
+
# Save model
|
| 220 |
+
if save_model:
|
| 221 |
+
model_path = os.path.join(base_dir, "ranker.pkl")
|
| 222 |
+
joblib.dump(model, model_path)
|
| 223 |
+
print(f"[ranker] Model saved to {model_path}")
|
| 224 |
+
|
| 225 |
+
return model, metrics, feature_cols
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
if __name__ == "__main__":
|
| 229 |
+
# Example run
|
| 230 |
+
train_model_ranker(
|
| 231 |
+
user_id="user_1",
|
| 232 |
+
save_model=True,
|
| 233 |
+
val_ratio=0.2,
|
| 234 |
+
random_state=42,
|
| 235 |
+
max_rows=None, # or set an upper bound for quick iterations, e.g., 200_000
|
| 236 |
+
model_params=None, # override defaults if desired
|
| 237 |
+
)
|
recipe_recommendation/user_data/demo_user_1/user_profile.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"user_id": "demo_user_1",
|
| 3 |
+
"num_feedback": 0,
|
| 4 |
+
"diet": {
|
| 5 |
+
"vegetarian_type": "flexible"
|
| 6 |
+
},
|
| 7 |
+
"allergies": [],
|
| 8 |
+
"region_preference": [
|
| 9 |
+
"North America"
|
| 10 |
+
],
|
| 11 |
+
"nutritional_goals": {
|
| 12 |
+
"calories": {
|
| 13 |
+
"min": 200,
|
| 14 |
+
"max": 800
|
| 15 |
+
},
|
| 16 |
+
"protein": {
|
| 17 |
+
"min": 20,
|
| 18 |
+
"max": 100
|
| 19 |
+
}
|
| 20 |
+
},
|
| 21 |
+
"other_preferences": {
|
| 22 |
+
"preferred_main": [
|
| 23 |
+
"chicken"
|
| 24 |
+
],
|
| 25 |
+
"disliked_main": [],
|
| 26 |
+
"cooking_time_max": 30
|
| 27 |
+
}
|
| 28 |
+
}
|
recipe_recommendation/user_data/user_0/feature_order.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"main_match_ratio",
|
| 3 |
+
"other_match_ratio",
|
| 4 |
+
"staple_match_ratio",
|
| 5 |
+
"missing_main_count",
|
| 6 |
+
"missing_other_count",
|
| 7 |
+
"missing_staple_count",
|
| 8 |
+
"calories",
|
| 9 |
+
"protein",
|
| 10 |
+
"fat",
|
| 11 |
+
"protein_ratio",
|
| 12 |
+
"fat_ratio",
|
| 13 |
+
"region_match",
|
| 14 |
+
"is_vegan_safe",
|
| 15 |
+
"is_vegetarian_safe_absolute",
|
| 16 |
+
"is_flexible_safe_absolute",
|
| 17 |
+
"is_user_diet_safe",
|
| 18 |
+
"low_calorie_penalty",
|
| 19 |
+
"preferred_main_overlap",
|
| 20 |
+
"preferred_course_overlap",
|
| 21 |
+
"within_cooking_time"
|
| 22 |
+
]
|
recipe_recommendation/user_data/user_0/feedback.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
main_match_ratio,other_match_ratio,staple_match_ratio,missing_main_count,missing_other_count,missing_staple_count,calories,protein,fat,protein_ratio,fat_ratio,region_match,is_vegan_safe,is_vegetarian_safe_absolute,is_flexible_safe_absolute,is_user_diet_safe,low_calorie_penalty,preferred_main_overlap,preferred_course_overlap,within_cooking_time,recipe_id,qid,relevance
|
| 2 |
+
0.0,0.0,0.0,1,3,1,123.9,0,0,0.0,0.0,0,0,0,0,1,1,0,0,1,73148,0,5
|
recipe_recommendation/user_data/user_0/qid.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
0
|
recipe_recommendation/user_data/user_0/ranker.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72a3361c05b69d3627a69983ee1460730b304b1a4c562be6fc75001ef9bd887f
|
| 3 |
+
size 1598006
|
recipe_recommendation/user_data/user_0/user_features_rank.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
recipe_recommendation/user_data/user_0/user_profile.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"user_id": "user_0",
|
| 3 |
+
"num_feedback": 0,
|
| 4 |
+
"diet": {
|
| 5 |
+
"vegetarian_type": "non_vegetarian"
|
| 6 |
+
},
|
| 7 |
+
"allergies": [],
|
| 8 |
+
"region_preference": [
|
| 9 |
+
"Asia"
|
| 10 |
+
],
|
| 11 |
+
"nutritional_goals": {
|
| 12 |
+
"calories": {
|
| 13 |
+
"min": 250,
|
| 14 |
+
"max": 4000
|
| 15 |
+
},
|
| 16 |
+
"protein": {
|
| 17 |
+
"min": 20,
|
| 18 |
+
"max": 160
|
| 19 |
+
}
|
| 20 |
+
},
|
| 21 |
+
"other_preferences": {
|
| 22 |
+
"preferred_main": [],
|
| 23 |
+
"disliked_main": [],
|
| 24 |
+
"cooking_time_max": 180
|
| 25 |
+
}
|
| 26 |
+
}
|
recipe_recommendation/user_data/user_1/feature_order.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"main_match_ratio",
|
| 3 |
+
"other_match_ratio",
|
| 4 |
+
"staple_match_ratio",
|
| 5 |
+
"missing_main_count",
|
| 6 |
+
"missing_other_count",
|
| 7 |
+
"missing_staple_count",
|
| 8 |
+
"calories",
|
| 9 |
+
"protein",
|
| 10 |
+
"fat",
|
| 11 |
+
"protein_ratio",
|
| 12 |
+
"fat_ratio",
|
| 13 |
+
"region_match",
|
| 14 |
+
"is_vegan_safe",
|
| 15 |
+
"is_vegetarian_safe_absolute",
|
| 16 |
+
"is_flexible_safe_absolute",
|
| 17 |
+
"is_user_diet_safe",
|
| 18 |
+
"low_calorie_penalty",
|
| 19 |
+
"preferred_main_overlap",
|
| 20 |
+
"preferred_course_overlap",
|
| 21 |
+
"within_cooking_time"
|
| 22 |
+
]
|
recipe_recommendation/user_data/user_1/feedback.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
main_match_ratio,other_match_ratio,staple_match_ratio,missing_main_count,missing_other_count,missing_staple_count,calories,protein,fat,protein_ratio,fat_ratio,region_match,is_vegan_safe,is_vegetarian_safe_absolute,is_flexible_safe_absolute,is_user_diet_safe,low_calorie_penalty,preferred_main_overlap,preferred_course_overlap,within_cooking_time,recipe_id,qid,relevance
|
| 2 |
+
0.0,0.0,0.0,1,3,1,320.2,0,0,0.0,0.0,0,0,0,0,0,1,1,0,1,44939,0,5
|
| 3 |
+
0.0,0.0,0.0,1,3,1,123.9,0,0,0.0,0.0,0,0,0,0,0,1,0,0,1,73148,1,5
|
recipe_recommendation/user_data/user_1/qid.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
2
|
recipe_recommendation/user_data/user_1/ranker.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8f305ad668b45ca0bb8d6f6cb1b87ca68d26a5c495622d2df4ac38e546b2787
|
| 3 |
+
size 1638981
|
recipe_recommendation/user_data/user_1/user_features_rank.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
recipe_recommendation/user_data/user_1/user_profile.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"user_id": "user_1",
|
| 3 |
+
"num_feedback": 0,
|
| 4 |
+
"diet": {
|
| 5 |
+
"vegetarian_type": "flexible"
|
| 6 |
+
},
|
| 7 |
+
"allergies": [],
|
| 8 |
+
"region_preference": [
|
| 9 |
+
"North America"
|
| 10 |
+
],
|
| 11 |
+
"nutritional_goals": {
|
| 12 |
+
"calories": {
|
| 13 |
+
"min": 250,
|
| 14 |
+
"max": 2000
|
| 15 |
+
},
|
| 16 |
+
"protein": {
|
| 17 |
+
"min": 50,
|
| 18 |
+
"max": 160
|
| 19 |
+
}
|
| 20 |
+
},
|
| 21 |
+
"other_preferences": {
|
| 22 |
+
"preferred_main": [],
|
| 23 |
+
"disliked_main": [],
|
| 24 |
+
"cooking_time_max": 45
|
| 25 |
+
}
|
| 26 |
+
}
|
recipe_recommendation/user_data/user_2/feature_order.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"main_match_ratio",
|
| 3 |
+
"other_match_ratio",
|
| 4 |
+
"staple_match_ratio",
|
| 5 |
+
"missing_main_count",
|
| 6 |
+
"missing_other_count",
|
| 7 |
+
"missing_staple_count",
|
| 8 |
+
"calories",
|
| 9 |
+
"protein",
|
| 10 |
+
"fat",
|
| 11 |
+
"protein_ratio",
|
| 12 |
+
"fat_ratio",
|
| 13 |
+
"region_match",
|
| 14 |
+
"is_vegan_safe",
|
| 15 |
+
"is_vegetarian_safe_absolute",
|
| 16 |
+
"is_flexible_safe_absolute",
|
| 17 |
+
"is_user_diet_safe",
|
| 18 |
+
"low_calorie_penalty",
|
| 19 |
+
"preferred_main_overlap",
|
| 20 |
+
"preferred_course_overlap",
|
| 21 |
+
"within_cooking_time"
|
| 22 |
+
]
|
recipe_recommendation/user_data/user_2/feedback.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
main_match_ratio,other_match_ratio,staple_match_ratio,missing_main_count,missing_other_count,missing_staple_count,calories,protein,fat,protein_ratio,fat_ratio,region_match,is_vegan_safe,is_vegetarian_safe_absolute,is_flexible_safe_absolute,is_user_diet_safe,low_calorie_penalty,preferred_main_overlap,preferred_course_overlap,within_cooking_time,recipe_id,qid,relevance
|
| 2 |
+
0.0,0.0,0.0,1,2,1,1640.1,0,0,0.0,0.0,0,0,0,0,1,1,0,0,1,106901,0,5
|