Spaces:
Sleeping
Sleeping
| """ | |
| app/services/llm.py | |
| LLM abstraction layer + proprietary food database population. | |
| Doc says: "Your 'AI analysis' is a prompt. Prompt engineering is not a | |
| competitive advantage." This module starts building the real moat: | |
| every verified scan result goes into food_products table. | |
| """ | |
| import os | |
| import re | |
| import json | |
| import logging | |
| import asyncio | |
| from app.models.db import get_ai_cache, set_ai_cache, db_conn | |
| logger = logging.getLogger(__name__) | |
| GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "") | |
| _groq_client = None | |
| if GROQ_API_KEY: | |
| from groq import Groq | |
| _groq_client = Groq(api_key=GROQ_API_KEY) | |
| MEDICAL_DISCLAIMER = ( | |
| "βοΈ For informational purposes only β not medical advice. " | |
| "Consult a qualified nutritionist or physician before making dietary decisions." | |
| ) | |
| LANGUAGE_MAP = { | |
| "en": "English", "zh": "Simplified Chinese", | |
| "es": "Spanish", "ar": "Arabic", | |
| "fr": "French", "hi": "Hindi (ΰ€Ήΰ€Ώΰ€¨ΰ₯ΰ€¦ΰ₯)", | |
| "pt": "Portuguese","de": "German", | |
| } | |
| def call_llm(prompt: str, max_tokens: int = 2500) -> str: | |
| """Provider-agnostic LLM call. Swap Groq β Anthropic β Ollama here.""" | |
| if not _groq_client: | |
| raise RuntimeError("GROQ_API_KEY not set") | |
| for model in ["llama-3.3-70b-versatile", "llama-3.1-8b-instant"]: | |
| try: | |
| comp = _groq_client.chat.completions.create( | |
| model=model, | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.1, max_tokens=max_tokens, | |
| response_format={"type": "json_object"}, | |
| ) | |
| return comp.choices[0].message.content | |
| except Exception as exc: | |
| logger.warning("LLM %s failed: %s", model, exc) | |
| raise RuntimeError("All LLM models failed") | |
| def build_analysis_prompt(extracted_text: str, persona: str, age_group: str, | |
| product_category: str, language: str, | |
| web_context: str, label_confidence: str, | |
| blur_info: dict) -> str: | |
| lang_name = LANGUAGE_MAP.get(language, "English") | |
| conf_note = ("β οΈ Label text may be partial β only list nutrients you can read confidently." | |
| if label_confidence == "low" else "") | |
| blur_ctx = "" | |
| if blur_info.get("detected"): | |
| verb = "enhanced via Wiener deconvolution" if blur_info.get("deblurred") else "blurry, used original" | |
| blur_ctx = f"IMAGE: {blur_info['severity']}ly blurry ({verb}). Only report confident values." | |
| return f"""[INST] | |
| You are an expert nutritional scientist and food safety auditor. | |
| CRITICAL: Respond ENTIRELY in {lang_name}. Every text field MUST be in {lang_name}. | |
| Persona: {persona} | Age: {age_group} | Category: {product_category} | |
| {conf_note} | |
| {blur_ctx} | |
| Label Text: "{extracted_text}" | |
| Web Context: "{web_context}" | |
| Return ONLY valid JSON β no markdown, no preamble: | |
| {{ | |
| "product_name" : "Short name from label", | |
| "product_category" : "Snack|Dairy|Beverage|Cereal|Supplement|etc.", | |
| "score" : <INTEGER 1-10 per SCORING RUBRIC β never default to 6 or 7>, | |
| "verdict" : "Two-word verdict in {lang_name}", | |
| "chart_data" : [<Safe%>, <Moderate%>, <Risky%>], | |
| "summary" : "2-sentence professional summary in {lang_name}.", | |
| "eli5_explanation" : "Child-friendly explanation with emojis in {lang_name}.", | |
| "molecular_insight" : "1-2 sentences on biochemical body impact in {lang_name}.", | |
| "paragraph_benefits": "Full paragraph on genuine benefits in {lang_name}.", | |
| "paragraph_uniqueness": "Unique characteristics OR 2 better alternatives in {lang_name}.", | |
| "is_unique" : true, | |
| "nutrient_breakdown": [ | |
| {{"name":"Protein","value":<ACTUAL g from label>,"unit":"g","rating":"good","impact":"brief note in {lang_name}"}}, | |
| {{"name":"Sugar","value":<ACTUAL g>,"unit":"g","rating":"moderate","impact":"brief note"}}, | |
| {{"name":"Fat","value":<ACTUAL g>,"unit":"g","rating":"good","impact":"brief note"}}, | |
| {{"name":"Sodium","value":<ACTUAL mg>,"unit":"mg","rating":"caution","impact":"brief note"}}, | |
| {{"name":"Fiber","value":<ACTUAL g>,"unit":"g","rating":"good","impact":"brief note"}} | |
| ], | |
| "pros" : ["Benefit 1 in {lang_name}", "Benefit 2", "Benefit 3"], | |
| "cons" : ["Risk 1 in {lang_name}", "Risk 2"], | |
| "age_warnings" : [ | |
| {{"group":"Children","emoji":"πΆ","status":"warning","message":"in {lang_name}"}}, | |
| {{"group":"Adults","emoji":"π§","status":"good","message":"in {lang_name}"}}, | |
| {{"group":"Seniors","emoji":"π΄","status":"caution","message":"in {lang_name}"}}, | |
| {{"group":"Pregnant","emoji":"π€°","status":"caution","message":"in {lang_name}"}} | |
| ], | |
| "better_alternative": "A specific healthier alternative in {lang_name}.", | |
| "is_low_confidence" : false | |
| }} | |
| SCORING RUBRIC β MANDATORY, never use 6 or 7 as defaults: | |
| 9-10: Whole food, no added sugar, low sodium, high fibre/protein | |
| 7-8 : Mildly processed, sugar <5g/100g, reasonable sodium | |
| 5-6 : Processed, sugar 5-15g/100g OR sodium 400-700mg/100g | |
| 3-4 : High sugar >15g/100g OR sodium >700mg/100g OR poor profile | |
| 1-2 : Ultra-processed, very high sugar/sodium/sat-fat | |
| RULES: chart_data sums to 100 | rating: good|moderate|caution|bad | status: good|caution|warning | |
| [/INST]""" | |
| def sanitise_result(result: dict) -> dict: | |
| """Fix all known LLM output issues: chart rounding, unit strings, defaults.""" | |
| # chart_data β must sum to exactly 100 | |
| cd = result.get("chart_data") | |
| if isinstance(cd, list) and len(cd) == 3 and all(isinstance(x, (int, float)) for x in cd): | |
| total = sum(cd) | |
| if total > 0 and total != 100: | |
| scaled = [round(v * 100 / total) for v in cd] | |
| scaled[scaled.index(max(scaled))] += 100 - sum(scaled) | |
| result["chart_data"] = scaled | |
| else: | |
| result["chart_data"] = [70, 20, 10] | |
| # Nutrient value "34g" β 34.0 | |
| for n in result.get("nutrient_breakdown", []): | |
| m = re.search(r"[\d]+\.?[\d]*", str(n.get("value", "")).replace(",", ".")) | |
| if m: | |
| n["value"] = float(m.group()) | |
| result.setdefault("score", 5) | |
| result.setdefault("verdict", "Analyzed") | |
| result.setdefault("product_name", "Unknown Product") | |
| result.setdefault("nutrient_breakdown", []) | |
| result.setdefault("pros", []) | |
| result.setdefault("cons", []) | |
| result.setdefault("age_warnings", []) | |
| result.setdefault("is_low_confidence", False) | |
| return result | |
| async def analyse_label( | |
| extracted_text: str, | |
| persona: str, | |
| age_group: str, | |
| product_category: str, | |
| language: str, | |
| web_context: str, | |
| blur_info: dict, | |
| label_confidence: str, | |
| ) -> dict: | |
| """Full analysis pipeline: cache β LLM β sanitise β return.""" | |
| cache_key = f"v3:{language}:{persona}:{age_group}:{extracted_text[:80]}" | |
| cached = get_ai_cache(cache_key) | |
| if cached: | |
| return cached | |
| prompt = build_analysis_prompt( | |
| extracted_text, persona, age_group, product_category, | |
| language, web_context, label_confidence, blur_info | |
| ) | |
| raw = await asyncio.to_thread(call_llm, prompt, 2500) | |
| result = sanitise_result(json.loads(raw)) | |
| result["disclaimer"] = MEDICAL_DISCLAIMER | |
| # Cache (without ephemeral fields) | |
| cacheable = {k: v for k, v in result.items() | |
| if k not in ("blur_info", "scan_meta", "allergen_warning")} | |
| set_ai_cache(cache_key, cacheable) | |
| return result | |
| # ββ Phase 2: Proprietary food database population βββββββββββββββββββββ | |
| def upsert_food_product( | |
| name: str, | |
| nutrients: list, | |
| score: int, | |
| ingredients_raw: str = "", | |
| barcode: str | None = None, | |
| brand: str = "", | |
| category: str = "", | |
| source: str = "llm_scan", | |
| ) -> int: | |
| """ | |
| Insert or update a product in the proprietary food_products table. | |
| Every scan calls this. Over time this builds a data moat. | |
| Returns the product id. | |
| """ | |
| def _get(key): | |
| for n in nutrients: | |
| if key in n.get("name", "").lower(): | |
| v = n.get("value", 0) | |
| return float(v) if isinstance(v, (int, float)) else 0 | |
| return 0 | |
| cal = _get("calorie") or _get("energy") or _get("kcal") | |
| prot = _get("protein") | |
| carb = _get("carb") or _get("carbohydrate") | |
| fat = _get("fat") | |
| sod = _get("sodium") | |
| fib = _get("fiber") or _get("fibre") | |
| sug = _get("sugar") | |
| sat = _get("saturated") | |
| with db_conn() as conn: | |
| # Try to find existing by barcode (most reliable) or name+brand | |
| if barcode: | |
| existing = conn.execute( | |
| "SELECT id, scan_count FROM food_products WHERE barcode=?", (barcode,) | |
| ).fetchone() | |
| else: | |
| existing = conn.execute( | |
| "SELECT id, scan_count FROM food_products WHERE name=? AND brand=?", | |
| (name.strip(), brand.strip()) | |
| ).fetchone() | |
| if existing: | |
| # Increment scan_count β this is how we know which products are popular | |
| conn.execute( | |
| """UPDATE food_products SET scan_count=scan_count+1, updated_at=datetime('now') | |
| WHERE id=?""", | |
| (existing["id"],) | |
| ) | |
| return existing["id"] | |
| else: | |
| cursor = conn.execute( | |
| """INSERT INTO food_products | |
| (name,brand,category,barcode,calories_100g,protein_100g,carbs_100g, | |
| fat_100g,sodium_100g,fiber_100g,sugar_100g,sat_fat_100g, | |
| eatlytic_score,ingredients_raw,source,scan_count) | |
| VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1)""", | |
| (name.strip(), brand, category, barcode, | |
| cal, prot, carb, fat, sod, fib, sug, sat, | |
| score, ingredients_raw, source) | |
| ) | |
| return cursor.lastrowid | |
| def get_food_from_db(name: str = "", barcode: str = "") -> dict | None: | |
| """Look up a product in our proprietary DB before hitting LLM.""" | |
| with db_conn() as conn: | |
| if barcode: | |
| row = conn.execute( | |
| "SELECT * FROM food_products WHERE barcode=? AND verified=1", (barcode,) | |
| ).fetchone() | |
| elif name: | |
| row = conn.execute( | |
| """SELECT * FROM food_products WHERE name LIKE ? | |
| AND verified=1 ORDER BY scan_count DESC LIMIT 1""", | |
| (f"%{name}%",) | |
| ).fetchone() | |
| else: | |
| return None | |
| return dict(row) if row else None | |