Prathamesh Sable commited on
Commit
59ab782
·
1 Parent(s): fc5a259

working ingredient analysis finetune needed

Browse files
data/Food_Aditives_E_numbers.csv CHANGED
@@ -1,4 +1,4 @@
1
- E Numbers,Name of Aditive,Description,Example Use
2
  E100,Curcumin,Naturally occurring orange/yellow colour Extracted from the spice turmeric,"Used in pastries, confectionery, sauces, and soups"
3
  E101,Riboflavin or lactoflavin (Vitamin B2),Naturally occurring B-group vitamin Usually obtained from yeast or produced synthetically,Enrichment and fortification of food Added to processed cheese as yellow/orange colour
4
  E102,Tartrazine,Widely used yellow/orange colour,"Found in soft drinks, cakes, biscuits, puddings, meat products, sauces, tinned and packet convenience foods and confectionery"
 
1
+ E Numbers,Name of Additive,Description,Example Use
2
  E100,Curcumin,Naturally occurring orange/yellow colour Extracted from the spice turmeric,"Used in pastries, confectionery, sauces, and soups"
3
  E101,Riboflavin or lactoflavin (Vitamin B2),Naturally occurring B-group vitamin Usually obtained from yeast or produced synthetically,Enrichment and fortification of food Added to processed cheese as yellow/orange colour
4
  E102,Tartrazine,Widely used yellow/orange colour,"Found in soft drinks, cakes, biscuits, puddings, meat products, sauces, tinned and packet convenience foods and confectionery"
interfaces/productModels.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from pydantic import BaseModel
3
+
4
+ # Add this class to define the request body structure
5
+ class ProductIngredientsRequest(BaseModel):
6
+ ingredients: List[str]
routers/analysis.py CHANGED
@@ -1,13 +1,17 @@
 
1
  from fastapi import APIRouter, Depends, HTTPException
2
  from sqlalchemy.orm import Session
3
  from typing import List, Dict, Any
 
4
  from interfaces.ingredientModels import IngredientAnalysisResult, IngredientRequest
 
5
  from services.auth_service import get_current_user
6
  from logger_manager import log_info, log_error,logger
7
  from db.database import get_db
8
  from db.repositories import IngredientRepository
9
 
10
  from services.ingredientFinderAgent import IngredientInfoAgentLangGraph
 
11
 
12
 
13
  router = APIRouter()
@@ -55,15 +59,59 @@ async def process_ingredient_endpoint(request: IngredientRequest, db: Session =
55
  raise HTTPException(status_code=500, detail="Internal Server Error")
56
 
57
 
58
- # @router.post("/process_ingredients")
59
- # def process_ingredients_endpoint(ingredients: List[str], db: Session = Depends(get_db), current_user: User = Depends(get_current_user)):
60
- # log_info("process_ingredients_endpoint called")
61
- # print(ingredients)
62
- # try:
63
- # # result = process_ingredients(db, ingredients, current_user.id)
64
- # result = None
65
- # log_info("process_ingredients_endpoint completed successfully")
66
- # return result
67
- # except Exception as e:
68
- # log_error(f"Error in process_ingredients_endpoint: {str(e)}")
69
- # raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
  from fastapi import APIRouter, Depends, HTTPException
3
  from sqlalchemy.orm import Session
4
  from typing import List, Dict, Any
5
+ from db.models import User
6
  from interfaces.ingredientModels import IngredientAnalysisResult, IngredientRequest
7
+ from interfaces.productModels import ProductIngredientsRequest
8
  from services.auth_service import get_current_user
9
  from logger_manager import log_info, log_error,logger
10
  from db.database import get_db
11
  from db.repositories import IngredientRepository
12
 
13
  from services.ingredientFinderAgent import IngredientInfoAgentLangGraph
14
+ from services.productAnalyzerAgent import analyze_product_ingredients
15
 
16
 
17
  router = APIRouter()
 
59
  raise HTTPException(status_code=500, detail="Internal Server Error")
60
 
61
 
62
+ @router.post("/process_product_ingredients", response_model=Dict[str, Any])
63
+ async def process_ingredients_endpoint(product_ingredient: ProductIngredientsRequest, db: Session = Depends(get_db), current_user: User = Depends(get_current_user)):
64
+ log_info(f"process_ingredients_endpoint called for {len(product_ingredient.ingredients)} ingredients")
65
+ ingredients = product_ingredient.ingredients
66
+ try:
67
+ # Step 1: Process individual ingredients
68
+ ingredient_results = []
69
+ ingredient_finder = IngredientInfoAgentLangGraph()
70
+ repo = IngredientRepository(db)
71
+
72
+ for ingredient_name in ingredients:
73
+ log_info(f"Processing ingredient: {ingredient_name}")
74
+
75
+ # Check if ingredient exists in database
76
+ db_ingredient = repo.get_ingredient_by_name(ingredient_name)
77
+
78
+ if db_ingredient:
79
+ log_info(f"Found existing ingredient in database: {ingredient_name}")
80
+ ingredient_data = ingredient_db_to_pydantic(db_ingredient)
81
+ else:
82
+ # Get from agent if not in database
83
+ log_info(f"Fetching ingredient from agent: {ingredient_name}")
84
+ ingredient_data = ingredient_finder.process_ingredient(ingredient_name)
85
+
86
+ # Save to database for future use
87
+ repo.create_ingredient(ingredient_data)
88
+ log_info(f"Saved new ingredient to database: {ingredient_name}")
89
+
90
+ ingredient_results.append(ingredient_data)
91
+
92
+ # Step 2: Generate aggregate analysis with product analyzer agent
93
+
94
+ product_analysis = await analyze_product_ingredients(
95
+ ingredients_data=ingredient_results,
96
+ user_preferences={
97
+ "user_id": current_user.id,
98
+ "allergies": current_user.preferences[0].allergens if current_user.preferences else None,
99
+ "dietary_restrictions": current_user.preferences[0].dietary_restrictions if current_user.preferences else None
100
+ } if current_user else {}
101
+ )
102
+
103
+ # Step 3: Prepare final response
104
+ result = {
105
+ "ingredients_count": len(ingredients),
106
+ "processed_ingredients": ingredient_results,
107
+ "overall_analysis": product_analysis,
108
+ "user_id": current_user.id if current_user else None,
109
+ "timestamp": datetime.now().isoformat()
110
+ }
111
+
112
+ log_info("process_ingredients_endpoint completed successfully")
113
+ return result
114
+
115
+ except Exception as e:
116
+ log_error(f"Error in process_ingredients_endpoint: {str(e)}")
117
+ raise HTTPException(status_code=500, detail=str(e))
services/ingredientFinderAgent.py CHANGED
@@ -36,7 +36,7 @@ def search_local_db(ingredient: str) -> Dict[str, Any]:
36
  """Search local database for ingredient information. E number database scrapped"""
37
  logger.info(f"Searching local DB for: {ingredient}")
38
  if additives_df is not None:
39
- match = additives_df[additives_df['Name of Aditive'].str.contains(ingredient, case=False, na=False)]
40
  if not match.empty:
41
  return {"source": "Local DB", "found": True, "data": match.iloc[0].to_dict()}
42
  return {"source": "Local DB", "found": False, "data": None}
 
36
  """Search local database for ingredient information. E number database scrapped"""
37
  logger.info(f"Searching local DB for: {ingredient}")
38
  if additives_df is not None:
39
+ match = additives_df[additives_df['Name of Additive'].str.contains(ingredient, case=False, na=False, regex=False)]
40
  if not match.empty:
41
  return {"source": "Local DB", "found": True, "data": match.iloc[0].to_dict()}
42
  return {"source": "Local DB", "found": False, "data": None}
services/productAnalyzerAgent.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Dict, Any, Optional
3
+ from dotenv import load_dotenv
4
+ from langchain_core.messages import HumanMessage
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+ from logger_manager import logger
7
+ from interfaces.ingredientModels import IngredientAnalysisResult
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ async def analyze_product_ingredients(
13
+ ingredients_data: List[IngredientAnalysisResult],
14
+ user_preferences: Optional[Dict[str, Any]] = None
15
+ ) -> Dict[str, Any]:
16
+ """
17
+ Analyze multiple ingredients to provide a comprehensive product analysis
18
+ for AR display, considering user preferences and dietary restrictions.
19
+ """
20
+ logger.info(f"Analyzing product with {len(ingredients_data)} ingredients")
21
+
22
+ # Initialize LLM
23
+ api_key = os.getenv("LLM_API_KEY")
24
+ model_name = os.getenv("LLM_MODEL_NAME", "gemini-2.0-flash")
25
+
26
+ llm = ChatGoogleGenerativeAI(
27
+ google_api_key=api_key,
28
+ model=model_name,
29
+ temperature=0.2 # Lower temperature for more factual responses
30
+ )
31
+
32
+ # Prepare ingredient data for the prompt
33
+ ingredients_summary = []
34
+ for i, ingredient in enumerate(ingredients_data):
35
+ ingredient_info = f"""
36
+ Ingredient {i+1}: {ingredient.name}
37
+ Safety Rating: {ingredient.safety_rating}/10
38
+ Diet Type: {ingredient.diet_type if hasattr(ingredient, 'diet_type') else 'Unknown'}
39
+ Allergic Info: {', '.join(ingredient.allergic_info) if hasattr(ingredient, 'allergic_info') and ingredient.allergic_info else 'None known'}
40
+ Health Effects: {', '.join(ingredient.health_effects) if ingredient.health_effects else 'Unknown'}
41
+ Description: {ingredient.description[:200] + '...' if len(ingredient.description) > 200 else ingredient.description}
42
+ """
43
+ ingredients_summary.append(ingredient_info)
44
+
45
+ # Add user preferences context if available
46
+ user_context = ""
47
+ if user_preferences:
48
+ allergies = user_preferences.get("allergies", "None specified")
49
+ diet = user_preferences.get("dietary_restrictions", "None specified")
50
+ user_context = f"""
51
+ User has the following preferences:
52
+ - Dietary Restrictions: {diet}
53
+ - Allergies: {allergies}
54
+ """
55
+
56
+ # Create the analysis prompt
57
+ analysis_prompt = f"""
58
+ # PRODUCT INGREDIENT ANALYSIS TASK
59
+
60
+ You are an expert food scientist and nutritionist analyzing a product's ingredients.
61
+ Based on the detailed information about each ingredient below, provide a comprehensive
62
+ analysis that would be helpful for a consumer viewing this in an AR application.
63
+
64
+ ## INGREDIENTS INFORMATION:
65
+ {''.join(ingredients_summary)}
66
+
67
+ {user_context}
68
+
69
+ ## REQUIRED ANALYSIS:
70
+ 1. Overall Safety Score (1-10): Calculate this based on individual ingredient safety scores
71
+ 2. Suitable Diet Types: Determine if this product is suitable for vegans, vegetarians, etc.
72
+ 3. Allergy Warnings: Flag any potential allergens present
73
+ 4. Usage Recommendations: Provide safe consumption limits or usage guidance
74
+ 5. Health Insights: Summarize health benefits and concerns
75
+ 6. Ingredient Interactions: Note any ingredients that may interact when combined
76
+ 7. Key Takeaway: A single sentence summarizing if this product is recommended
77
+
78
+ ## FORMAT YOUR RESPONSE AS JSON:
79
+ {{
80
+ "overall_safety_score": (number between 1-10),
81
+ "suitable_diet_types": (array of strings like "Vegan", "Vegetarian", etc.),
82
+ "allergy_warnings": (array of strings),
83
+ "usage_recommendations": (string with specific guidance),
84
+ "health_insights": {{
85
+ "benefits": (array of strings),
86
+ "concerns": (array of strings)
87
+ }},
88
+ "ingredient_interactions": (array of strings),
89
+ "key_takeaway": (string)
90
+ }}
91
+
92
+ Only include factual information based on the provided data. If information is unavailable for any field, use appropriate default values.
93
+ """
94
+
95
+ logger.info("Sending product analysis prompt to LLM")
96
+
97
+ try:
98
+ # Process with LLM
99
+ message = HumanMessage(content=analysis_prompt)
100
+ llm_response = llm.invoke([message])
101
+ analysis_text = llm_response.content
102
+
103
+ # Extract JSON from response
104
+ import json
105
+ import re
106
+
107
+ # Find JSON in the response using regex
108
+ json_match = re.search(r'({.*})', analysis_text.replace('\n', ' '), re.DOTALL)
109
+
110
+ if json_match:
111
+ try:
112
+ analysis = json.loads(json_match.group(0))
113
+ logger.info("Successfully parsed product analysis")
114
+ return analysis
115
+ except json.JSONDecodeError as e:
116
+ logger.error(f"JSON parsing error: {e}")
117
+ # Return a simplified analysis on error
118
+ return {
119
+ "overall_safety_score": calculate_average_safety(ingredients_data),
120
+ "error": "Failed to parse complete analysis",
121
+ "ingredient_count": len(ingredients_data),
122
+ "key_takeaway": "Analysis error occurred, please check individual ingredients"
123
+ }
124
+ else:
125
+ logger.error("Could not find JSON in LLM response")
126
+ return {
127
+ "overall_safety_score": calculate_average_safety(ingredients_data),
128
+ "error": "Failed to generate structured analysis",
129
+ "ingredient_count": len(ingredients_data)
130
+ }
131
+
132
+ except Exception as e:
133
+ logger.error(f"Error in product analysis: {e}")
134
+ # Fallback analysis based on simple calculations
135
+ return generate_fallback_analysis(ingredients_data)
136
+
137
+
138
+ def calculate_average_safety(ingredients_data: List[IngredientAnalysisResult]) -> float:
139
+ """Calculate average safety score from ingredients."""
140
+ safety_scores = [i.safety_rating for i in ingredients_data if i.safety_rating is not None]
141
+ if not safety_scores:
142
+ return 5.0 # Default middle value
143
+ return round(sum(safety_scores) / len(safety_scores), 1)
144
+
145
+
146
+ def generate_fallback_analysis(ingredients_data: List[IngredientAnalysisResult]) -> Dict[str, Any]:
147
+ """Generate a basic analysis when LLM processing fails."""
148
+ # Extract known allergens
149
+ allergens = []
150
+ for ingredient in ingredients_data:
151
+ if hasattr(ingredient, 'allergic_info') and ingredient.allergic_info:
152
+ allergens.extend(ingredient.allergic_info)
153
+
154
+ # Determine diet type based on ingredients
155
+ diet_types = []
156
+ all_vegan = all(getattr(i, 'diet_type', '') == 'vegan' for i in ingredients_data
157
+ if hasattr(i, 'diet_type') and i.diet_type)
158
+ all_vegetarian = all(getattr(i, 'diet_type', '') in ['vegan', 'vegetarian']
159
+ for i in ingredients_data if hasattr(i, 'diet_type') and i.diet_type)
160
+
161
+ if all_vegan:
162
+ diet_types.append("Vegan")
163
+ if all_vegetarian:
164
+ diet_types.append("Vegetarian")
165
+
166
+ # Calculate safety score
167
+ safety_score = calculate_average_safety(ingredients_data)
168
+
169
+ return {
170
+ "overall_safety_score": safety_score,
171
+ "suitable_diet_types": diet_types,
172
+ "allergy_warnings": list(set(allergens)),
173
+ "usage_recommendations": "Please refer to product packaging for usage guidelines",
174
+ "health_insights": {
175
+ "benefits": [],
176
+ "concerns": ["Analysis system encountered an error, please check individual ingredients"]
177
+ },
178
+ "key_takeaway": f"Product has {len(ingredients_data)} ingredients with average safety score of {safety_score}/10"
179
+ }