Spaces:
Sleeping
Sleeping
vk commited on
Commit ·
28db69a
1
Parent(s): 5c5dfcc
Integrate USDA FoodData Central API for intelligent food understanding
Browse files- Replace hardcoded ingredient detection with USDA API calls
- Use government food database to understand 'burger' -> 'ground beef patty' etc
- Combine USDA suggestions with DialoGPT and original query
- Truly intelligent food term recognition without any hardcoding
- Free unlimited API usage from USDA
- app.py +69 -26
- requirements.txt +3 -1
app.py
CHANGED
|
@@ -13,6 +13,9 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
| 13 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 14 |
import numpy as np
|
| 15 |
import urllib.request
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Initialize FastAPI app
|
| 18 |
app = FastAPI(
|
|
@@ -248,29 +251,73 @@ def load_recipes():
|
|
| 248 |
print(f"📍 Error details: {type(e).__name__}: {str(e)}")
|
| 249 |
raise Exception(f"Failed to load recipe database: {e}")
|
| 250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
@torch.inference_mode()
|
| 252 |
-
def extract_query_features_with_llm(query_text, preferences="", max_minutes=30):
|
| 253 |
-
"""Use
|
| 254 |
global tokenizer, model
|
| 255 |
|
| 256 |
full_query = f"{query_text} {preferences}".strip()
|
| 257 |
|
| 258 |
-
# Start with the original query
|
| 259 |
base_search_terms = [full_query]
|
| 260 |
|
| 261 |
-
#
|
| 262 |
-
|
|
|
|
|
|
|
|
|
|
| 263 |
if model is not None and tokenizer is not None:
|
| 264 |
try:
|
| 265 |
-
|
| 266 |
-
conversation = f"User: I want to cook {full_query}".strip()
|
| 267 |
|
| 268 |
inputs = tokenizer.encode(conversation + tokenizer.eos_token, return_tensors="pt").to(device)
|
| 269 |
|
| 270 |
-
# Generate a response to understand intent
|
| 271 |
outputs = model.generate(
|
| 272 |
inputs,
|
| 273 |
-
max_new_tokens=
|
| 274 |
temperature=0.7,
|
| 275 |
top_p=0.9,
|
| 276 |
do_sample=True,
|
|
@@ -280,32 +327,28 @@ def extract_query_features_with_llm(query_text, preferences="", max_minutes=30):
|
|
| 280 |
|
| 281 |
response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
|
| 282 |
|
| 283 |
-
#
|
| 284 |
-
response_lower = response.lower()
|
| 285 |
-
food_keywords = []
|
| 286 |
-
|
| 287 |
-
# Look for food-related words in the response
|
| 288 |
-
food_indicators = ['recipe', 'cook', 'make', 'dish', 'meal', 'food', 'ingredient', 'cuisine']
|
| 289 |
for word in response.split():
|
| 290 |
word_clean = word.lower().strip('.,!?')
|
| 291 |
-
if
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
enhanced_terms = food_keywords[:5] # Limit to top 5 terms
|
| 295 |
|
| 296 |
-
|
| 297 |
|
| 298 |
except Exception as e:
|
| 299 |
-
print(f"⚠️ DialoGPT
|
|
|
|
|
|
|
|
|
|
| 300 |
|
| 301 |
-
|
| 302 |
-
all_search_terms = base_search_terms + enhanced_terms
|
| 303 |
|
| 304 |
return {
|
| 305 |
'original_query': full_query,
|
| 306 |
'search_terms': all_search_terms,
|
| 307 |
'max_minutes': max_minutes,
|
| 308 |
-
'
|
|
|
|
| 309 |
}
|
| 310 |
|
| 311 |
|
|
@@ -525,8 +568,8 @@ async def get_recipe_suggestions(request: RecipeRequest):
|
|
| 525 |
|
| 526 |
print(f"📥 Recipe request: {request.ingredients}, prefs: {request.preferences}, time: {request.max_minutes}")
|
| 527 |
|
| 528 |
-
# Use LLM for intelligent feature extraction
|
| 529 |
-
query_features = extract_query_features_with_llm(
|
| 530 |
request.ingredients,
|
| 531 |
request.preferences,
|
| 532 |
request.max_minutes
|
|
|
|
| 13 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 14 |
import numpy as np
|
| 15 |
import urllib.request
|
| 16 |
+
import requests
|
| 17 |
+
import asyncio
|
| 18 |
+
import aiohttp
|
| 19 |
|
| 20 |
# Initialize FastAPI app
|
| 21 |
app = FastAPI(
|
|
|
|
| 251 |
print(f"📍 Error details: {type(e).__name__}: {str(e)}")
|
| 252 |
raise Exception(f"Failed to load recipe database: {e}")
|
| 253 |
|
| 254 |
+
async def get_usda_food_suggestions(query_text, limit=5):
|
| 255 |
+
"""Use USDA FoodData Central API to intelligently understand food terms"""
|
| 256 |
+
try:
|
| 257 |
+
# Clean the query to extract potential food terms
|
| 258 |
+
food_words = [word for word in query_text.lower().split()
|
| 259 |
+
if word not in ['i', 'want', 'recipe', 'recipes', 'for', 'the', 'a', 'an']]
|
| 260 |
+
|
| 261 |
+
if not food_words:
|
| 262 |
+
return []
|
| 263 |
+
|
| 264 |
+
# Search USDA database for food items
|
| 265 |
+
search_term = ' '.join(food_words[:2]) # Use first 2 meaningful words
|
| 266 |
+
|
| 267 |
+
url = "https://api.nal.usda.gov/fdc/v1/foods/search"
|
| 268 |
+
params = {
|
| 269 |
+
'query': search_term,
|
| 270 |
+
'dataType': ['Foundation', 'SR Legacy'], # Most comprehensive data
|
| 271 |
+
'pageSize': limit,
|
| 272 |
+
'api_key': 'DEMO_KEY' # Free demo key, works for testing
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
async with aiohttp.ClientSession() as session:
|
| 276 |
+
async with session.get(url, params=params) as response:
|
| 277 |
+
if response.status == 200:
|
| 278 |
+
data = await response.json()
|
| 279 |
+
|
| 280 |
+
food_suggestions = []
|
| 281 |
+
for food in data.get('foods', []):
|
| 282 |
+
description = food.get('description', '').lower()
|
| 283 |
+
# Extract meaningful food terms from USDA descriptions
|
| 284 |
+
if description:
|
| 285 |
+
food_suggestions.append(description)
|
| 286 |
+
|
| 287 |
+
print(f"🥗 USDA found: {food_suggestions[:3]}")
|
| 288 |
+
return food_suggestions[:3] # Return top 3 matches
|
| 289 |
+
else:
|
| 290 |
+
print(f"⚠️ USDA API error: {response.status}")
|
| 291 |
+
return []
|
| 292 |
+
|
| 293 |
+
except Exception as e:
|
| 294 |
+
print(f"⚠️ USDA API failed: {e}")
|
| 295 |
+
return []
|
| 296 |
+
|
| 297 |
@torch.inference_mode()
|
| 298 |
+
async def extract_query_features_with_llm(query_text, preferences="", max_minutes=30):
|
| 299 |
+
"""Use USDA API + DialoGPT for truly intelligent food understanding"""
|
| 300 |
global tokenizer, model
|
| 301 |
|
| 302 |
full_query = f"{query_text} {preferences}".strip()
|
| 303 |
|
| 304 |
+
# Start with the original query
|
| 305 |
base_search_terms = [full_query]
|
| 306 |
|
| 307 |
+
# Get intelligent food suggestions from USDA
|
| 308 |
+
usda_suggestions = await get_usda_food_suggestions(query_text)
|
| 309 |
+
|
| 310 |
+
# If DialoGPT is available, use it for context enhancement
|
| 311 |
+
llm_enhanced_terms = []
|
| 312 |
if model is not None and tokenizer is not None:
|
| 313 |
try:
|
| 314 |
+
conversation = f"User: I want to cook {query_text}".strip()
|
|
|
|
| 315 |
|
| 316 |
inputs = tokenizer.encode(conversation + tokenizer.eos_token, return_tensors="pt").to(device)
|
| 317 |
|
|
|
|
| 318 |
outputs = model.generate(
|
| 319 |
inputs,
|
| 320 |
+
max_new_tokens=20,
|
| 321 |
temperature=0.7,
|
| 322 |
top_p=0.9,
|
| 323 |
do_sample=True,
|
|
|
|
| 327 |
|
| 328 |
response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
|
| 329 |
|
| 330 |
+
# Only extract actual food/cooking terms
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
for word in response.split():
|
| 332 |
word_clean = word.lower().strip('.,!?')
|
| 333 |
+
if len(word_clean) > 3 and word_clean not in ['that', 'have', 'with', 'this', 'your', 'they', 'them']:
|
| 334 |
+
llm_enhanced_terms.append(word_clean)
|
|
|
|
|
|
|
| 335 |
|
| 336 |
+
llm_enhanced_terms = llm_enhanced_terms[:2] # Limit to 2 terms
|
| 337 |
|
| 338 |
except Exception as e:
|
| 339 |
+
print(f"⚠️ DialoGPT failed: {e}")
|
| 340 |
+
|
| 341 |
+
# Combine all intelligent suggestions
|
| 342 |
+
all_search_terms = base_search_terms + usda_suggestions + llm_enhanced_terms
|
| 343 |
|
| 344 |
+
print(f"🧠 Smart search terms: {all_search_terms[:5]}")
|
|
|
|
| 345 |
|
| 346 |
return {
|
| 347 |
'original_query': full_query,
|
| 348 |
'search_terms': all_search_terms,
|
| 349 |
'max_minutes': max_minutes,
|
| 350 |
+
'usda_enhanced': len(usda_suggestions) > 0,
|
| 351 |
+
'llm_enhanced': len(llm_enhanced_terms) > 0
|
| 352 |
}
|
| 353 |
|
| 354 |
|
|
|
|
| 568 |
|
| 569 |
print(f"📥 Recipe request: {request.ingredients}, prefs: {request.preferences}, time: {request.max_minutes}")
|
| 570 |
|
| 571 |
+
# Use USDA API + LLM for intelligent feature extraction
|
| 572 |
+
query_features = await extract_query_features_with_llm(
|
| 573 |
request.ingredients,
|
| 574 |
request.preferences,
|
| 575 |
request.max_minutes
|
requirements.txt
CHANGED
|
@@ -9,4 +9,6 @@ safetensors>=0.4.0
|
|
| 9 |
pandas>=2.0.0
|
| 10 |
scikit-learn>=1.3.0
|
| 11 |
numpy>=1.24.0
|
| 12 |
-
datasets>=2.19.0
|
|
|
|
|
|
|
|
|
| 9 |
pandas>=2.0.0
|
| 10 |
scikit-learn>=1.3.0
|
| 11 |
numpy>=1.24.0
|
| 12 |
+
datasets>=2.19.0
|
| 13 |
+
aiohttp>=3.8.0
|
| 14 |
+
requests>=2.25.0
|