NutriFind / app.py
HuggingFace-SK
add groq api
468edda
import os
import requests
import json
from io import BytesIO
import cv2
from flask import Flask, jsonify, render_template, request, send_file
import base64
import numpy as np
import pytesseract
from groq import Groq
app = Flask(__name__)
HF_API_KEY=os.getenv("HF_API_KEY")
print(HF_API_KEY)
client = Groq(api_key=HF_API_KEY)
print(client)
@app.route("/")
def index():
return render_template("index.html")
@app.route('/analyse', methods=['POST'])
def analyse():
print("request", request.json.get('user_item_info'), "- done")
frame = request.json.get('key')
user_item_info = request.json.get('user_item_info')
cv_frame = data_uri_to_image(frame)
gray_image = cv2.cvtColor(cv_frame, cv2.COLOR_BGR2GRAY)
blur = cv2.bilateralFilter(gray_image,4,20,20)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) # histograme of distributed pixel values
equalized = clahe.apply(blur)
print("preprocess - done")
# Optional: Apply thresholding or other preprocessing technique
# Use pytesseract to extract text
'''results = reader.readtext(equalized)
results_array =[]
for (bbox, text, prob) in results:
results_array.append(f"[{text} (Confidence: {prob})]")'''
custom_config = r'--psm 11'
text = pytesseract.image_to_string(equalized, config=custom_config)
print("OCR", text, " - done")
# Iterate through the results and format them
#cv2.imshow("threash", thresh_image)
#cv2.imshow("clahe", equalized)
#cv2.imshow("blur", blur)
#cv2.imshow("orig", gray_image)
#cv2.waitKey(0)
item=user_item_info
messages = [
{ "role": "system", "content": "You are a helpful nutritional information summarizer who is being used in a live application. You will be given the output of an OCR stage which has detected the text on a particular packaged food's label. You are tasked to give a comprehensive summary of the nutritional value, and ingredients if present. Comment on the health of the product.\nIf there are words you do not recognize, make educated guesses. At no point shall you refer to the inputted text or make users aware of any mistakes in character recognition. DO NOT REFER TO INPUT TEXT OR OCR " },
{ "role": "user", "content": f"{item}: {text}" }
]
completion = client.chat.completions.create(
model="llama3-8b-8192",
messages=messages,
max_tokens=500
)
print("LLM", str(completion.choices[0].message.content), " - done")
return str(completion.choices[0].message.content)
def data_uri_to_image(data_uri):
header, encoded = data_uri.split(',', 1)
decoded_data = base64.b64decode(encoded)
nparr = np.frombuffer(decoded_data, np.uint8)
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
return image
def image_to_data_uri(image):
# Encode the image as a JPEG
_, buffer = cv2.imencode('.jpg', image)
# Convert the buffer to bytes
image_bytes = buffer.tobytes()
# Encode the bytes to Base64
base64_encoded = base64.b64encode(image_bytes).decode('utf-8')
# Create the Data URI
data_uri = f"data:image/jpeg;base64,{base64_encoded}"
return data_uri
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)