Spaces:
Sleeping
Sleeping
HuggingFace-SK
commited on
Commit
·
7194e59
1
Parent(s):
3e9e64f
try llms on server
Browse files- app.py +39 -19
- requirements.txt +5 -1
app.py
CHANGED
|
@@ -6,16 +6,36 @@ import cv2
|
|
| 6 |
from flask import Flask, jsonify, render_template, request, send_file
|
| 7 |
import base64
|
| 8 |
import numpy as np
|
| 9 |
-
import
|
| 10 |
from huggingface_hub import InferenceClient
|
|
|
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
reader = easyocr.Reader(['en'])
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
client = InferenceClient(api_key=HF_API_KEY)
|
| 19 |
|
| 20 |
|
| 21 |
|
|
@@ -28,6 +48,7 @@ def index():
|
|
| 28 |
|
| 29 |
@app.route('/analyse', methods=['POST'])
|
| 30 |
def analyse():
|
|
|
|
| 31 |
frame = request.json.get('key')
|
| 32 |
user_item_info = request.json.get('user_item_info')
|
| 33 |
cv_frame = data_uri_to_image(frame)
|
|
@@ -35,16 +56,17 @@ def analyse():
|
|
| 35 |
blur = cv2.bilateralFilter(gray_image,4,20,20)
|
| 36 |
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) # histograme of distributed pixel values
|
| 37 |
equalized = clahe.apply(blur)
|
| 38 |
-
|
| 39 |
-
# Optional: Apply thresholding or other preprocessing
|
| 40 |
-
_, thresh_image = cv2.threshold(equalized, 150, 255, cv2.THRESH_BINARY)
|
| 41 |
|
| 42 |
# Use pytesseract to extract text
|
| 43 |
-
results = reader.readtext(equalized)
|
| 44 |
results_array =[]
|
| 45 |
for (bbox, text, prob) in results:
|
| 46 |
-
results_array.append(f"[{text} (Confidence: {prob})]")
|
| 47 |
-
|
|
|
|
|
|
|
| 48 |
# Iterate through the results and format them
|
| 49 |
#cv2.imshow("threash", thresh_image)
|
| 50 |
#cv2.imshow("clahe", equalized)
|
|
@@ -54,15 +76,13 @@ def analyse():
|
|
| 54 |
item=user_item_info
|
| 55 |
messages = [
|
| 56 |
{ "role": "system", "content": "You are a helpful nutritional information summarizer who is being used in a live application. You will be given the output of an OCR stage which has detected the text on a particular packaged food's label. You are tasked to give a comprehensive summary of the nutritional value, and ingredients if present. Comment on the health of the product.\nIf there are words you do not recognize, make educated guesses. At no point shall you refer to the inputted text or make users aware of any mistakes in character recognition. DO NOT REFER TO INPUT TEXT OR OCR " },
|
| 57 |
-
{ "role": "user", "content": f"{item}: {
|
| 58 |
]
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
)
|
| 65 |
-
return str(completion.choices[0].message.content)
|
| 66 |
|
| 67 |
|
| 68 |
def data_uri_to_image(data_uri):
|
|
@@ -85,4 +105,4 @@ def image_to_data_uri(image):
|
|
| 85 |
|
| 86 |
|
| 87 |
if __name__ == "__main__":
|
| 88 |
-
app.run(host="0.0.0.0", port=7860)
|
|
|
|
| 6 |
from flask import Flask, jsonify, render_template, request, send_file
|
| 7 |
import base64
|
| 8 |
import numpy as np
|
| 9 |
+
import pytesseract
|
| 10 |
from huggingface_hub import InferenceClient
|
| 11 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 12 |
|
| 13 |
+
model_name = "microsoft/Phi-3-mini-4k-instruct"
|
| 14 |
+
|
| 15 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 16 |
+
model_name,
|
| 17 |
+
torch_dtype="auto",
|
| 18 |
+
device_map="auto"
|
| 19 |
+
)
|
| 20 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 21 |
|
|
|
|
| 22 |
|
| 23 |
+
pipe = pipeline(
|
| 24 |
+
"text-generation",
|
| 25 |
+
model=model,
|
| 26 |
+
tokenizer=tokenizer,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
generation_args = {
|
| 30 |
+
"max_new_tokens": 500,
|
| 31 |
+
"return_full_text": False,
|
| 32 |
+
"temperature": 0.0,
|
| 33 |
+
"do_sample": False,
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
app = Flask(__name__)
|
| 37 |
+
|
| 38 |
|
|
|
|
| 39 |
|
| 40 |
|
| 41 |
|
|
|
|
| 48 |
|
| 49 |
@app.route('/analyse', methods=['POST'])
|
| 50 |
def analyse():
|
| 51 |
+
print("request", request.json.get('user_item_info'), "- done")
|
| 52 |
frame = request.json.get('key')
|
| 53 |
user_item_info = request.json.get('user_item_info')
|
| 54 |
cv_frame = data_uri_to_image(frame)
|
|
|
|
| 56 |
blur = cv2.bilateralFilter(gray_image,4,20,20)
|
| 57 |
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) # histograme of distributed pixel values
|
| 58 |
equalized = clahe.apply(blur)
|
| 59 |
+
print("preprocess - done")
|
| 60 |
+
# Optional: Apply thresholding or other preprocessing technique
|
|
|
|
| 61 |
|
| 62 |
# Use pytesseract to extract text
|
| 63 |
+
'''results = reader.readtext(equalized)
|
| 64 |
results_array =[]
|
| 65 |
for (bbox, text, prob) in results:
|
| 66 |
+
results_array.append(f"[{text} (Confidence: {prob})]")'''
|
| 67 |
+
custom_config = r'--psm 11'
|
| 68 |
+
text = pytesseract.image_to_string(equalized, config=custom_config)
|
| 69 |
+
print("OCR", text, " - done")
|
| 70 |
# Iterate through the results and format them
|
| 71 |
#cv2.imshow("threash", thresh_image)
|
| 72 |
#cv2.imshow("clahe", equalized)
|
|
|
|
| 76 |
item=user_item_info
|
| 77 |
messages = [
|
| 78 |
{ "role": "system", "content": "You are a helpful nutritional information summarizer who is being used in a live application. You will be given the output of an OCR stage which has detected the text on a particular packaged food's label. You are tasked to give a comprehensive summary of the nutritional value, and ingredients if present. Comment on the health of the product.\nIf there are words you do not recognize, make educated guesses. At no point shall you refer to the inputted text or make users aware of any mistakes in character recognition. DO NOT REFER TO INPUT TEXT OR OCR " },
|
| 79 |
+
{ "role": "user", "content": f"{item}: {text}" }
|
| 80 |
]
|
| 81 |
|
| 82 |
+
output = pipe(messages, **generation_args)
|
| 83 |
+
|
| 84 |
+
print("LLM", output[0]['generated_text'], " - done")
|
| 85 |
+
return output[0]['generated_text']
|
|
|
|
|
|
|
| 86 |
|
| 87 |
|
| 88 |
def data_uri_to_image(data_uri):
|
|
|
|
| 105 |
|
| 106 |
|
| 107 |
if __name__ == "__main__":
|
| 108 |
+
app.run(host="0.0.0.0", port=7860, ssl_context=("cert.pem", "key.pem"))
|
requirements.txt
CHANGED
|
@@ -2,4 +2,8 @@ opencv-python-headless == 4.10.0.84
|
|
| 2 |
numpy == 1.23.3
|
| 3 |
Flask == 3.0.3
|
| 4 |
Flask-SocketIO == 5.4.1
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
numpy == 1.23.3
|
| 3 |
Flask == 3.0.3
|
| 4 |
Flask-SocketIO == 5.4.1
|
| 5 |
+
flash_attn==2.5.8
|
| 6 |
+
torch==2.3.1
|
| 7 |
+
accelerate==0.31.0
|
| 8 |
+
transformers==4.41.2
|
| 9 |
+
pytesseract
|