NutriFind

Sleeping

App Files Files Community

HuggingFace-SK commited on Nov 28, 2024

Commit

7194e59

1 Parent(s): 3e9e64f

try llms on server

Browse files

Files changed (2) hide show

app.py +39 -19
requirements.txt +5 -1

app.py CHANGED Viewed

@@ -6,16 +6,36 @@ import cv2
 from flask import Flask, jsonify, render_template, request, send_file
 import base64
 import numpy as np
-import easyocr
 from huggingface_hub import InferenceClient
-app = Flask(__name__)
-reader = easyocr.Reader(['en'])
-HF_API_KEY=os.getenv("HF_API_KEY")
-client = InferenceClient(api_key=HF_API_KEY)
@@ -28,6 +48,7 @@ def index():
 @app.route('/analyse', methods=['POST'])
 def analyse():
     frame = request.json.get('key')
     user_item_info = request.json.get('user_item_info')
     cv_frame = data_uri_to_image(frame)
@@ -35,16 +56,17 @@ def analyse():
     blur = cv2.bilateralFilter(gray_image,4,20,20)
     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) # histograme of distributed pixel values
     equalized = clahe.apply(blur)
-# Optional: Apply thresholding or other preprocessing techniques
-    _, thresh_image = cv2.threshold(equalized, 150, 255, cv2.THRESH_BINARY)
 # Use pytesseract to extract text
-    results = reader.readtext(equalized)
     results_array =[]
     for (bbox, text, prob) in results:
-        results_array.append(f"[{text} (Confidence: {prob})]")
 # Iterate through the results and format them
     #cv2.imshow("threash", thresh_image)
     #cv2.imshow("clahe", equalized)
@@ -54,15 +76,13 @@ def analyse():
     item=user_item_info
     messages = [
 	{ "role": "system", "content": "You are a helpful nutritional information summarizer who is being used in a live application. You will be given the output of an OCR stage which has detected the text on a particular packaged food's label. You are tasked to give a comprehensive summary of the nutritional value, and ingredients if present. Comment on the health of the product.\nIf there are words you do not recognize, make educated guesses. At no point shall you refer to the inputted text or make users aware of any mistakes in character recognition. DO NOT REFER TO INPUT TEXT OR OCR " },
-	{ "role": "user", "content": f"{item}: {results_array}" }
 ]
-    completion = client.chat.completions.create(
-    model="Qwen/Qwen2.5-Coder-32B-Instruct",
-	messages=messages,
-	max_tokens=500
-)
-    return str(completion.choices[0].message.content)
 def data_uri_to_image(data_uri):
@@ -85,4 +105,4 @@ def image_to_data_uri(image):
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860)

 from flask import Flask, jsonify, render_template, request, send_file
 import base64
 import numpy as np
+import pytesseract
 from huggingface_hub import InferenceClient
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_name = "microsoft/Phi-3-mini-4k-instruct"
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+)
+generation_args = {
+    "max_new_tokens": 500,
+    "return_full_text": False,
+    "temperature": 0.0,
+    "do_sample": False,
+}
+app = Flask(__name__)
 @app.route('/analyse', methods=['POST'])
 def analyse():
+    print("request", request.json.get('user_item_info'), "- done")
     frame = request.json.get('key')
     user_item_info = request.json.get('user_item_info')
     cv_frame = data_uri_to_image(frame)
     blur = cv2.bilateralFilter(gray_image,4,20,20)
     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) # histograme of distributed pixel values
     equalized = clahe.apply(blur)
+    print("preprocess - done")
+# Optional: Apply thresholding or other preprocessing technique
 # Use pytesseract to extract text
+    '''results = reader.readtext(equalized)
     results_array =[]
     for (bbox, text, prob) in results:
+        results_array.append(f"[{text} (Confidence: {prob})]")'''
+    custom_config = r'--psm 11'
+    text = pytesseract.image_to_string(equalized, config=custom_config)
+    print("OCR", text, " - done")
 # Iterate through the results and format them
     #cv2.imshow("threash", thresh_image)
     #cv2.imshow("clahe", equalized)
     item=user_item_info
     messages = [
 	{ "role": "system", "content": "You are a helpful nutritional information summarizer who is being used in a live application. You will be given the output of an OCR stage which has detected the text on a particular packaged food's label. You are tasked to give a comprehensive summary of the nutritional value, and ingredients if present. Comment on the health of the product.\nIf there are words you do not recognize, make educated guesses. At no point shall you refer to the inputted text or make users aware of any mistakes in character recognition. DO NOT REFER TO INPUT TEXT OR OCR " },
+	{ "role": "user", "content": f"{item}: {text}" }
 ]
+    output = pipe(messages, **generation_args)
+    print("LLM", output[0]['generated_text'], " - done")
+    return output[0]['generated_text']
 def data_uri_to_image(data_uri):
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860, ssl_context=("cert.pem", "key.pem"))

requirements.txt CHANGED Viewed

@@ -2,4 +2,8 @@ opencv-python-headless == 4.10.0.84
 numpy == 1.23.3
 Flask == 3.0.3
 Flask-SocketIO == 5.4.1
-easyocr == 1.7.2

 numpy == 1.23.3
 Flask == 3.0.3
 Flask-SocketIO == 5.4.1
+flash_attn==2.5.8
+torch==2.3.1
+accelerate==0.31.0
+transformers==4.41.2
+pytesseract