NutriFind

Sleeping

File size: 3,193 Bytes

import os
import requests
import json
from io import BytesIO
import cv2
from flask import Flask, jsonify, render_template, request, send_file
import base64
import numpy as np
import pytesseract
from groq import Groq

app = Flask(__name__)

HF_API_KEY=os.getenv("HF_API_KEY")
print(HF_API_KEY)
client = Groq(api_key=HF_API_KEY)
print(client)





@app.route("/")
def index():
    return render_template("index.html")


@app.route('/analyse', methods=['POST'])
def analyse():
    print("request", request.json.get('user_item_info'), "- done")
    frame = request.json.get('key')
    user_item_info = request.json.get('user_item_info')
    cv_frame = data_uri_to_image(frame)
    gray_image = cv2.cvtColor(cv_frame, cv2.COLOR_BGR2GRAY)
    blur = cv2.bilateralFilter(gray_image,4,20,20)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) # histograme of distributed pixel values
    equalized = clahe.apply(blur)
    print("preprocess - done")
# Optional: Apply thresholding or other preprocessing technique

# Use pytesseract to extract text
    '''results = reader.readtext(equalized)
    results_array =[]
    for (bbox, text, prob) in results:
        results_array.append(f"[{text} (Confidence: {prob})]")'''
    custom_config = r'--psm 11'
    text = pytesseract.image_to_string(equalized, config=custom_config)
    print("OCR", text, " - done")
# Iterate through the results and format them
    #cv2.imshow("threash", thresh_image)
    #cv2.imshow("clahe", equalized)
    #cv2.imshow("blur", blur)
    #cv2.imshow("orig", gray_image)
    #cv2.waitKey(0)
    item=user_item_info
    messages = [
	{ "role": "system", "content": "You are a helpful nutritional information summarizer who is being used in a live application. You will be given the output of an OCR stage which has detected the text on a particular packaged food's label. You are tasked to give a comprehensive summary of the nutritional value, and ingredients if present. Comment on the health of the product.\nIf there are words you do not recognize, make educated guesses. At no point shall you refer to the inputted text or make users aware of any mistakes in character recognition. DO NOT REFER TO INPUT TEXT OR OCR " },
	{ "role": "user", "content": f"{item}: {text}" }
]

    completion = client.chat.completions.create(
    model="llama3-8b-8192", 
	messages=messages, 
	max_tokens=500
)
    print("LLM", str(completion.choices[0].message.content), " - done")
    return str(completion.choices[0].message.content)


def data_uri_to_image(data_uri):
    header, encoded = data_uri.split(',', 1)
    decoded_data = base64.b64decode(encoded)
    nparr = np.frombuffer(decoded_data, np.uint8)
    image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    return image 

def image_to_data_uri(image):
    # Encode the image as a JPEG
    _, buffer = cv2.imencode('.jpg', image)
    # Convert the buffer to bytes
    image_bytes = buffer.tobytes()
    # Encode the bytes to Base64
    base64_encoded = base64.b64encode(image_bytes).decode('utf-8')
    # Create the Data URI
    data_uri = f"data:image/jpeg;base64,{base64_encoded}"
    return data_uri


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)