Spaces:

nutrientartcd
/

recipe-ai-fastapi

Sleeping

vk commited on Sep 2, 2025

Commit

28db69a

1 Parent(s): 5c5dfcc

Integrate USDA FoodData Central API for intelligent food understanding

- Replace hardcoded ingredient detection with USDA API calls
- Use government food database to understand 'burger' -> 'ground beef patty' etc
- Combine USDA suggestions with DialoGPT and original query
- Truly intelligent food term recognition without any hardcoding
- Free unlimited API usage from USDA

Files changed (2) hide show

app.py +69 -26
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -13,6 +13,9 @@ from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.feature_extraction.text import TfidfVectorizer
 import numpy as np
 import urllib.request
 # Initialize FastAPI app
 app = FastAPI(
@@ -248,29 +251,73 @@ def load_recipes():
         print(f"📍 Error details: {type(e).__name__}: {str(e)}")
         raise Exception(f"Failed to load recipe database: {e}")
 @torch.inference_mode()
-def extract_query_features_with_llm(query_text, preferences="", max_minutes=30):
-    """Use DialoGPT to enhance query understanding, then pass full query to search"""
     global tokenizer, model
     full_query = f"{query_text} {preferences}".strip()
-    # Start with the original query as our search terms
     base_search_terms = [full_query]
-    # If DialoGPT is available, use it to enhance understanding
-    enhanced_terms = []
     if model is not None and tokenizer is not None:
         try:
-            # Use DialoGPT to understand context and intent
-            conversation = f"User: I want to cook {full_query}".strip()
             inputs = tokenizer.encode(conversation + tokenizer.eos_token, return_tensors="pt").to(device)
-            # Generate a response to understand intent
             outputs = model.generate(
                 inputs,
-                max_new_tokens=30,
                 temperature=0.7,
                 top_p=0.9,
                 do_sample=True,
@@ -280,32 +327,28 @@ def extract_query_features_with_llm(query_text, preferences="", max_minutes=30):
             response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
-            # Extract key food-related words from DialoGPT response
-            response_lower = response.lower()
-            food_keywords = []
-            # Look for food-related words in the response
-            food_indicators = ['recipe', 'cook', 'make', 'dish', 'meal', 'food', 'ingredient', 'cuisine']
             for word in response.split():
                 word_clean = word.lower().strip('.,!?')
-                if word_clean in food_indicators or len(word_clean) > 3:  # Capture potential food words
-                    food_keywords.append(word_clean)
-            enhanced_terms = food_keywords[:5]  # Limit to top 5 terms
-            print(f"🤖 DialoGPT enhanced with: {enhanced_terms}")
         except Exception as e:
-            print(f"⚠️ DialoGPT enhancement failed: {e}")
-    # Combine original query with enhanced terms
-    all_search_terms = base_search_terms + enhanced_terms
     return {
         'original_query': full_query,
         'search_terms': all_search_terms,
         'max_minutes': max_minutes,
-        'enhanced_by_llm': len(enhanced_terms) > 0
     }
@@ -525,8 +568,8 @@ async def get_recipe_suggestions(request: RecipeRequest):
         print(f"📥 Recipe request: {request.ingredients}, prefs: {request.preferences}, time: {request.max_minutes}")
-        # Use LLM for intelligent feature extraction
-        query_features = extract_query_features_with_llm(
             request.ingredients,
             request.preferences,
             request.max_minutes

 from sklearn.feature_extraction.text import TfidfVectorizer
 import numpy as np
 import urllib.request
+import requests
+import asyncio
+import aiohttp
 # Initialize FastAPI app
 app = FastAPI(
         print(f"📍 Error details: {type(e).__name__}: {str(e)}")
         raise Exception(f"Failed to load recipe database: {e}")
+async def get_usda_food_suggestions(query_text, limit=5):
+    """Use USDA FoodData Central API to intelligently understand food terms"""
+    try:
+        # Clean the query to extract potential food terms
+        food_words = [word for word in query_text.lower().split()
+                     if word not in ['i', 'want', 'recipe', 'recipes', 'for', 'the', 'a', 'an']]
+        if not food_words:
+            return []
+        # Search USDA database for food items
+        search_term = ' '.join(food_words[:2])  # Use first 2 meaningful words
+        url = "https://api.nal.usda.gov/fdc/v1/foods/search"
+        params = {
+            'query': search_term,
+            'dataType': ['Foundation', 'SR Legacy'],  # Most comprehensive data
+            'pageSize': limit,
+            'api_key': 'DEMO_KEY'  # Free demo key, works for testing
+        }
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, params=params) as response:
+                if response.status == 200:
+                    data = await response.json()
+                    food_suggestions = []
+                    for food in data.get('foods', []):
+                        description = food.get('description', '').lower()
+                        # Extract meaningful food terms from USDA descriptions
+                        if description:
+                            food_suggestions.append(description)
+                    print(f"🥗 USDA found: {food_suggestions[:3]}")
+                    return food_suggestions[:3]  # Return top 3 matches
+                else:
+                    print(f"⚠️ USDA API error: {response.status}")
+                    return []
+    except Exception as e:
+        print(f"⚠️ USDA API failed: {e}")
+        return []
 @torch.inference_mode()
+async def extract_query_features_with_llm(query_text, preferences="", max_minutes=30):
+    """Use USDA API + DialoGPT for truly intelligent food understanding"""
     global tokenizer, model
     full_query = f"{query_text} {preferences}".strip()
+    # Start with the original query
     base_search_terms = [full_query]
+    # Get intelligent food suggestions from USDA
+    usda_suggestions = await get_usda_food_suggestions(query_text)
+    # If DialoGPT is available, use it for context enhancement
+    llm_enhanced_terms = []
     if model is not None and tokenizer is not None:
         try:
+            conversation = f"User: I want to cook {query_text}".strip()
             inputs = tokenizer.encode(conversation + tokenizer.eos_token, return_tensors="pt").to(device)
             outputs = model.generate(
                 inputs,
+                max_new_tokens=20,
                 temperature=0.7,
                 top_p=0.9,
                 do_sample=True,
             response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
+            # Only extract actual food/cooking terms
             for word in response.split():
                 word_clean = word.lower().strip('.,!?')
+                if len(word_clean) > 3 and word_clean not in ['that', 'have', 'with', 'this', 'your', 'they', 'them']:
+                    llm_enhanced_terms.append(word_clean)
+            llm_enhanced_terms = llm_enhanced_terms[:2]  # Limit to 2 terms
         except Exception as e:
+            print(f"⚠️ DialoGPT failed: {e}")
+    # Combine all intelligent suggestions
+    all_search_terms = base_search_terms + usda_suggestions + llm_enhanced_terms
+    print(f"🧠 Smart search terms: {all_search_terms[:5]}")
     return {
         'original_query': full_query,
         'search_terms': all_search_terms,
         'max_minutes': max_minutes,
+        'usda_enhanced': len(usda_suggestions) > 0,
+        'llm_enhanced': len(llm_enhanced_terms) > 0
     }
         print(f"📥 Recipe request: {request.ingredients}, prefs: {request.preferences}, time: {request.max_minutes}")
+        # Use USDA API + LLM for intelligent feature extraction
+        query_features = await extract_query_features_with_llm(
             request.ingredients,
             request.preferences,
             request.max_minutes

requirements.txt CHANGED Viewed

@@ -9,4 +9,6 @@ safetensors>=0.4.0
 pandas>=2.0.0
 scikit-learn>=1.3.0
 numpy>=1.24.0
-datasets>=2.19.0

 pandas>=2.0.0
 scikit-learn>=1.3.0
 numpy>=1.24.0
+datasets>=2.19.0
+aiohttp>=3.8.0
+requests>=2.25.0