File size: 3,070 Bytes
030432c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import logging
from flask import Blueprint, request, jsonify
from concurrent.futures import ThreadPoolExecutor
from app.utils.ocr import extract_text_from_image
from app.utils.ner import extract_ingredients
from app.utils.prediction import predict_with_description
from app.utils.helper import correct_spelling, load_data

analyze_blueprint = Blueprint('analyze', __name__)
logging.basicConfig(level=logging.INFO)

# Load data + model only once
df_cosing, df_brand, product_embeddings = load_data()

@analyze_blueprint.route("/analyze", methods=["POST"])
def analyze_ingredients():
    try:
        logging.info("Start analyzing ingredients")
        ingredients_input = []

        # OCR from image (optional)
        if 'ingredients' in request.files:
            logging.info("Extracting ingredients from uploaded image using OCR")
            text = extract_text_from_image(request.files['ingredients'])
            logging.info(f"OCR text result: {text}")
            if text.strip():
                extracted = extract_ingredients(text)
                logging.info(f"Extracted ingredients from OCR: {extracted}")
                ingredients_input.extend(extracted)

        # From JSON or form
        data = request.get_json(silent=True) or {}
        text_input = data.get('ingredients') or request.form.get('ingredients')
        logging.info(f"Text input from JSON/form: {text_input}")

        # Parse text input
        if isinstance(text_input, str):
            import re
            manual_split = [i.strip() for i in re.split(r',|;', text_input) if i.strip()]
            parsed = manual_split or extract_ingredients(text_input)
            logging.info(f"Parsed ingredients from string input: {parsed}")
            ingredients_input.extend(parsed)
        elif isinstance(text_input, list):
            cleaned_list = [i.strip().lower() for i in text_input if i.strip()]
            logging.info(f"Parsed ingredients from list input: {cleaned_list}")
            ingredients_input.extend(cleaned_list)

        if not ingredients_input:
            logging.warning("No ingredients recognized after processing input.")
            return jsonify({"error": "No ingredients recognized."}), 400

        # Clean & deduplicate
        ingredients_input = list(set(ingredients_input))
        logging.info(f"Unique ingredients before spell check: {ingredients_input}")
        corrected = [correct_spelling(ing, df_cosing) for ing in ingredients_input]
        logging.info(f"Corrected ingredients: {corrected}")

        # Predict individual ingredient effects
        logging.info("Predicting individual ingredient effects")
        with ThreadPoolExecutor() as executor:
            results = list(executor.map(lambda ing: predict_with_description(ing, df_cosing), corrected))
        logging.info(f"Prediction results: {results}")


        return jsonify({
            "Ingredient Analysis": results,
        })

    except Exception as e:
        logging.exception(f"Error in analyze_ingredients: {e}")
        return jsonify({"error": str(e)}), 500