HuggingFace-SK commited on
Commit
7194e59
·
1 Parent(s): 3e9e64f

try llms on server

Browse files
Files changed (2) hide show
  1. app.py +39 -19
  2. requirements.txt +5 -1
app.py CHANGED
@@ -6,16 +6,36 @@ import cv2
6
  from flask import Flask, jsonify, render_template, request, send_file
7
  import base64
8
  import numpy as np
9
- import easyocr
10
  from huggingface_hub import InferenceClient
 
11
 
12
- app = Flask(__name__)
 
 
 
 
 
 
 
13
 
14
- reader = easyocr.Reader(['en'])
15
 
16
- HF_API_KEY=os.getenv("HF_API_KEY")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- client = InferenceClient(api_key=HF_API_KEY)
19
 
20
 
21
 
@@ -28,6 +48,7 @@ def index():
28
 
29
  @app.route('/analyse', methods=['POST'])
30
  def analyse():
 
31
  frame = request.json.get('key')
32
  user_item_info = request.json.get('user_item_info')
33
  cv_frame = data_uri_to_image(frame)
@@ -35,16 +56,17 @@ def analyse():
35
  blur = cv2.bilateralFilter(gray_image,4,20,20)
36
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) # histograme of distributed pixel values
37
  equalized = clahe.apply(blur)
38
-
39
- # Optional: Apply thresholding or other preprocessing techniques
40
- _, thresh_image = cv2.threshold(equalized, 150, 255, cv2.THRESH_BINARY)
41
 
42
  # Use pytesseract to extract text
43
- results = reader.readtext(equalized)
44
  results_array =[]
45
  for (bbox, text, prob) in results:
46
- results_array.append(f"[{text} (Confidence: {prob})]")
47
-
 
 
48
  # Iterate through the results and format them
49
  #cv2.imshow("threash", thresh_image)
50
  #cv2.imshow("clahe", equalized)
@@ -54,15 +76,13 @@ def analyse():
54
  item=user_item_info
55
  messages = [
56
  { "role": "system", "content": "You are a helpful nutritional information summarizer who is being used in a live application. You will be given the output of an OCR stage which has detected the text on a particular packaged food's label. You are tasked to give a comprehensive summary of the nutritional value, and ingredients if present. Comment on the health of the product.\nIf there are words you do not recognize, make educated guesses. At no point shall you refer to the inputted text or make users aware of any mistakes in character recognition. DO NOT REFER TO INPUT TEXT OR OCR " },
57
- { "role": "user", "content": f"{item}: {results_array}" }
58
  ]
59
 
60
- completion = client.chat.completions.create(
61
- model="Qwen/Qwen2.5-Coder-32B-Instruct",
62
- messages=messages,
63
- max_tokens=500
64
- )
65
- return str(completion.choices[0].message.content)
66
 
67
 
68
  def data_uri_to_image(data_uri):
@@ -85,4 +105,4 @@ def image_to_data_uri(image):
85
 
86
 
87
  if __name__ == "__main__":
88
- app.run(host="0.0.0.0", port=7860)
 
6
  from flask import Flask, jsonify, render_template, request, send_file
7
  import base64
8
  import numpy as np
9
+ import pytesseract
10
  from huggingface_hub import InferenceClient
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer
12
 
13
+ model_name = "microsoft/Phi-3-mini-4k-instruct"
14
+
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ model_name,
17
+ torch_dtype="auto",
18
+ device_map="auto"
19
+ )
20
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
21
 
 
22
 
23
+ pipe = pipeline(
24
+ "text-generation",
25
+ model=model,
26
+ tokenizer=tokenizer,
27
+ )
28
+
29
+ generation_args = {
30
+ "max_new_tokens": 500,
31
+ "return_full_text": False,
32
+ "temperature": 0.0,
33
+ "do_sample": False,
34
+ }
35
+
36
+ app = Flask(__name__)
37
+
38
 
 
39
 
40
 
41
 
 
48
 
49
  @app.route('/analyse', methods=['POST'])
50
  def analyse():
51
+ print("request", request.json.get('user_item_info'), "- done")
52
  frame = request.json.get('key')
53
  user_item_info = request.json.get('user_item_info')
54
  cv_frame = data_uri_to_image(frame)
 
56
  blur = cv2.bilateralFilter(gray_image,4,20,20)
57
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) # histograme of distributed pixel values
58
  equalized = clahe.apply(blur)
59
+ print("preprocess - done")
60
+ # Optional: Apply thresholding or other preprocessing technique
 
61
 
62
  # Use pytesseract to extract text
63
+ '''results = reader.readtext(equalized)
64
  results_array =[]
65
  for (bbox, text, prob) in results:
66
+ results_array.append(f"[{text} (Confidence: {prob})]")'''
67
+ custom_config = r'--psm 11'
68
+ text = pytesseract.image_to_string(equalized, config=custom_config)
69
+ print("OCR", text, " - done")
70
  # Iterate through the results and format them
71
  #cv2.imshow("threash", thresh_image)
72
  #cv2.imshow("clahe", equalized)
 
76
  item=user_item_info
77
  messages = [
78
  { "role": "system", "content": "You are a helpful nutritional information summarizer who is being used in a live application. You will be given the output of an OCR stage which has detected the text on a particular packaged food's label. You are tasked to give a comprehensive summary of the nutritional value, and ingredients if present. Comment on the health of the product.\nIf there are words you do not recognize, make educated guesses. At no point shall you refer to the inputted text or make users aware of any mistakes in character recognition. DO NOT REFER TO INPUT TEXT OR OCR " },
79
+ { "role": "user", "content": f"{item}: {text}" }
80
  ]
81
 
82
+ output = pipe(messages, **generation_args)
83
+
84
+ print("LLM", output[0]['generated_text'], " - done")
85
+ return output[0]['generated_text']
 
 
86
 
87
 
88
  def data_uri_to_image(data_uri):
 
105
 
106
 
107
  if __name__ == "__main__":
108
+ app.run(host="0.0.0.0", port=7860, ssl_context=("cert.pem", "key.pem"))
requirements.txt CHANGED
@@ -2,4 +2,8 @@ opencv-python-headless == 4.10.0.84
2
  numpy == 1.23.3
3
  Flask == 3.0.3
4
  Flask-SocketIO == 5.4.1
5
- easyocr == 1.7.2
 
 
 
 
 
2
  numpy == 1.23.3
3
  Flask == 3.0.3
4
  Flask-SocketIO == 5.4.1
5
+ flash_attn==2.5.8
6
+ torch==2.3.1
7
+ accelerate==0.31.0
8
+ transformers==4.41.2
9
+ pytesseract