File size: 3,193 Bytes
db6e2f8
 
 
 
012ed80
db6e2f8
012ed80
 
7194e59
468edda
7194e59
 
 
468edda
 
 
 
185303a
 
 
 
 
db6e2f8
 
 
 
 
012ed80
 
7194e59
012ed80
185303a
012ed80
 
6224ce2
 
 
7194e59
 
012ed80
 
7194e59
012ed80
 
7194e59
 
 
 
012ed80
1be30b3
19a3d36
 
 
 
185303a
 
 
7194e59
185303a
6224ce2
468edda
 
 
 
 
 
 
012ed80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db6e2f8
 
 
125b910
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
import requests
import json
from io import BytesIO
import cv2
from flask import Flask, jsonify, render_template, request, send_file
import base64
import numpy as np
import pytesseract
from groq import Groq

app = Flask(__name__)

HF_API_KEY=os.getenv("HF_API_KEY")
print(HF_API_KEY)
client = Groq(api_key=HF_API_KEY)
print(client)





@app.route("/")
def index():
    return render_template("index.html")


@app.route('/analyse', methods=['POST'])
def analyse():
    print("request", request.json.get('user_item_info'), "- done")
    frame = request.json.get('key')
    user_item_info = request.json.get('user_item_info')
    cv_frame = data_uri_to_image(frame)
    gray_image = cv2.cvtColor(cv_frame, cv2.COLOR_BGR2GRAY)
    blur = cv2.bilateralFilter(gray_image,4,20,20)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) # histograme of distributed pixel values
    equalized = clahe.apply(blur)
    print("preprocess - done")
# Optional: Apply thresholding or other preprocessing technique

# Use pytesseract to extract text
    '''results = reader.readtext(equalized)
    results_array =[]
    for (bbox, text, prob) in results:
        results_array.append(f"[{text} (Confidence: {prob})]")'''
    custom_config = r'--psm 11'
    text = pytesseract.image_to_string(equalized, config=custom_config)
    print("OCR", text, " - done")
# Iterate through the results and format them
    #cv2.imshow("threash", thresh_image)
    #cv2.imshow("clahe", equalized)
    #cv2.imshow("blur", blur)
    #cv2.imshow("orig", gray_image)
    #cv2.waitKey(0)
    item=user_item_info
    messages = [
	{ "role": "system", "content": "You are a helpful nutritional information summarizer who is being used in a live application. You will be given the output of an OCR stage which has detected the text on a particular packaged food's label. You are tasked to give a comprehensive summary of the nutritional value, and ingredients if present. Comment on the health of the product.\nIf there are words you do not recognize, make educated guesses. At no point shall you refer to the inputted text or make users aware of any mistakes in character recognition. DO NOT REFER TO INPUT TEXT OR OCR " },
	{ "role": "user", "content": f"{item}: {text}" }
]

    completion = client.chat.completions.create(
    model="llama3-8b-8192", 
	messages=messages, 
	max_tokens=500
)
    print("LLM", str(completion.choices[0].message.content), " - done")
    return str(completion.choices[0].message.content)


def data_uri_to_image(data_uri):
    header, encoded = data_uri.split(',', 1)
    decoded_data = base64.b64decode(encoded)
    nparr = np.frombuffer(decoded_data, np.uint8)
    image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    return image 

def image_to_data_uri(image):
    # Encode the image as a JPEG
    _, buffer = cv2.imencode('.jpg', image)
    # Convert the buffer to bytes
    image_bytes = buffer.tobytes()
    # Encode the bytes to Base64
    base64_encoded = base64.b64encode(image_bytes).decode('utf-8')
    # Create the Data URI
    data_uri = f"data:image/jpeg;base64,{base64_encoded}"
    return data_uri


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)