jhonparra18 commited on
Commit
08f01a2
·
1 Parent(s): cdaaee8

app behaviour

Browse files
Files changed (3) hide show
  1. app.py +17 -17
  2. config.py +1 -1
  3. image_processor.py +26 -9
app.py CHANGED
@@ -4,24 +4,23 @@ import numpy as np
4
  from PIL import Image
5
  from app_utils import TEMP_DIR_NAME,save_uploaded_file,reset_chat
6
  import os
7
- import sys
8
  from text_summarizer import agent,processor
9
- import logging
10
 
11
- logging.basicConfig(level=logging.INFO)
12
 
13
- BOT_DEFAULT_MSG="Hello 👋 I'm a test AI assistant to help you with your questions about an input file, or feel free to ask me anything"
14
- st.set_page_config(page_title="Invoice | Receipt LLM Summarizer",layout='wide',page_icon=":shark:")
15
 
16
  #placeholders for temporal image path and an image processor in case we want to read img text separately
17
  IMAGE_TMP_PATH=None
18
  PROCESSOR=processor
19
  img_text=""
 
20
 
21
  with st.sidebar:
22
 
23
  st.markdown(
24
- f"<h1 style='text-align: center;'> Invoice|Receipt LLM Summarizer using OpenCV+Tesseract+LLM</h1><br><br>",
25
  unsafe_allow_html=True
26
  )
27
  input_image = st.file_uploader(label='Receipt|Invoice Image',help="Upload an image",type=['jpg','png','jpeg'])
@@ -46,7 +45,7 @@ with st.sidebar:
46
 
47
  # Initialize chat history based on streamlit doc for chat applications https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
48
  if "messages" not in st.session_state:
49
- st.session_state.messages = []
50
  st.session_state.messages.append({"role": "assistant", "content": BOT_DEFAULT_MSG})
51
 
52
 
@@ -55,22 +54,23 @@ for message in st.session_state.messages:
55
  with st.chat_message(message["role"]):
56
  st.markdown(message["content"])
57
 
 
 
 
58
 
59
- if prompt := st.chat_input("Write a message to the AI assistant | Escribe un mensaje para el asistente de IA"):
60
-
61
- st.chat_message("user").markdown(prompt)
62
  st.session_state.messages.append({"role": "user", "content": prompt})
63
-
64
- prompt_ad=f'{prompt}, img path: {IMAGE_TMP_PATH}' if (input_image is not None and not inject_text) else f'{prompt} text: {img_text}'
65
- logging.info(f'PROMPT: {prompt_ad}')
66
- ##streamlit callback https://python.langchain.com/docs/integrations/callbacks/streamlit
67
- st_callback = StreamlitCallbackHandler(st.container())
 
68
  #hotfix to errors
69
  try:
70
  response = agent.run(prompt_ad,callbacks=[st_callback])
71
  except ValueError as e:
72
  response = "Sorry i could't understand your last question."
73
- with st.chat_message("assistant"):
74
- st.markdown(response)
75
  # Add assistant response to chat history
76
  st.session_state.messages.append({"role": "assistant", "content": response})
 
 
4
  from PIL import Image
5
  from app_utils import TEMP_DIR_NAME,save_uploaded_file,reset_chat
6
  import os
7
+ from pathlib import Path
8
  from text_summarizer import agent,processor
 
9
 
10
+ BOT_DEFAULT_MSG="Hello 👋 I'm a test AI OCR assistant to help you with your questions about your receipts or similar images containing text. Also feel free to ask me anything"
11
 
12
+ st.set_page_config(page_title="OCR+LLM Image summarizer",layout='wide',page_icon=":shark:")
 
13
 
14
  #placeholders for temporal image path and an image processor in case we want to read img text separately
15
  IMAGE_TMP_PATH=None
16
  PROCESSOR=processor
17
  img_text=""
18
+ inject_text=False
19
 
20
  with st.sidebar:
21
 
22
  st.markdown(
23
+ f"<h1 style='text-align: center;'> Invoice|Receipt Summarizer using OpenCV+Tesseract+LLM</h1><br><br>",
24
  unsafe_allow_html=True
25
  )
26
  input_image = st.file_uploader(label='Receipt|Invoice Image',help="Upload an image",type=['jpg','png','jpeg'])
 
45
 
46
  # Initialize chat history based on streamlit doc for chat applications https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
47
  if "messages" not in st.session_state:
48
+ reset_chat()
49
  st.session_state.messages.append({"role": "assistant", "content": BOT_DEFAULT_MSG})
50
 
51
 
 
54
  with st.chat_message(message["role"]):
55
  st.markdown(message["content"])
56
 
57
+ prompt=st.chat_input("Write a message to the AI assistant | Escribe un mensaje para el asistente de IA")
58
+
59
+ if prompt:
60
 
 
 
 
61
  st.session_state.messages.append({"role": "user", "content": prompt})
62
+ st.chat_message("user").markdown(prompt)
63
+ prompt_ad=f'{prompt}, img path: {IMAGE_TMP_PATH}' if (input_image is not None and not inject_text) else (f'{prompt} text: {img_text}' if inject_text else prompt)
64
+ #streamlit callback https://python.langchain.com/docs/integrations/callbacks/streamlit
65
+ print(f'PROMPT: {prompt_ad}')
66
+ st_callback = StreamlitCallbackHandler(st.container())
67
+
68
  #hotfix to errors
69
  try:
70
  response = agent.run(prompt_ad,callbacks=[st_callback])
71
  except ValueError as e:
72
  response = "Sorry i could't understand your last question."
73
+
 
74
  # Add assistant response to chat history
75
  st.session_state.messages.append({"role": "assistant", "content": response})
76
+ st.chat_message("assistant").markdown(response)
config.py CHANGED
@@ -1,3 +1,3 @@
1
- PYTESSERACT_DEFAULT_CONFIG= r'--psm 4'
2
  OPEN_AI_MODEL_NAME="gpt-3.5-turbo-0613" #fine-tuned for function detection see https://python.langchain.com/docs/modules/agents/agent_types/openai_functions_agent
3
  DEBUG_MODE_LLM=False
 
1
+ PYTESSERACT_DEFAULT_CONFIG= r'--oem 3 --psm 4'
2
  OPEN_AI_MODEL_NAME="gpt-3.5-turbo-0613" #fine-tuned for function detection see https://python.langchain.com/docs/modules/agents/agent_types/openai_functions_agent
3
  DEBUG_MODE_LLM=False
image_processor.py CHANGED
@@ -7,14 +7,14 @@ import numpy as np
7
  from langchain.tools import BaseTool
8
  from typing import Optional, Type
9
  from langchain.callbacks.manager import AsyncCallbackManagerForToolRun
10
-
11
 
12
  class ImageProcessor(BaseTool):
13
 
14
  name = "ImageProcessor"
15
  description = "useful when you need to extract info from an image in an img_path corresponding to a receipt or invoice and tries to preprocess it returning all the text in the image using an OCR system."
16
 
17
- def binarize(self,img_path:str):
18
  """
19
  This function is to binarize an input image
20
  :param img: image in format of (h, w, channel)
@@ -32,17 +32,17 @@ class ImageProcessor(BaseTool):
32
  new = np.clip(new, 0, 255).astype(np.uint8)
33
  return new
34
 
35
- def deskew(self,image):
36
- coords = np.column_stack(np.where(image > 0))
37
  angle = cv2.minAreaRect(coords)[-1]
38
  if angle < -45:
39
  angle = -(90 + angle)
40
  else:
41
  angle = -angle
42
- (h, w) = image.shape[:2]
43
  center = (w // 2, h // 2)
44
  M = cv2.getRotationMatrix2D(center, angle, 1.0)
45
- rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
46
  return rotated
47
 
48
  def dilate_erode(self,img):
@@ -60,11 +60,27 @@ class ImageProcessor(BaseTool):
60
  img= cv2.dilate(img, kernel2, iterations=1)
61
  return img
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def opening(self,image):
64
  kernel = np.ones((5,5),np.uint8)
65
  return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
66
 
67
- def process_image(self,img_path:str):
68
  img=self.binarize(img_path)
69
  img=self.remove_watermark(img)
70
  return img
@@ -74,17 +90,18 @@ class ImageProcessor(BaseTool):
74
  return text
75
 
76
  def _run(self,img_path,save_to_disk=False):
77
- img=self.process_image(str(img_path))
78
  text=self.img_to_text(img)
79
  if save_to_disk:
80
  with open(f"/tmp/{str(img_path).split('/')[-1].replace('.jpg','.txt')}",'w') as f:
81
  f.write(text)
 
82
  return text
83
 
84
  # as used in langchain documentation https://python.langchain.com/docs/modules/agents/tools/custom_tools
85
  async def _arun(self, img_path: str,save_to_disk=False, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
86
  ) -> str:
87
  """Use the tool asynchronously."""
88
- raise NotImplementedError("custom_search does not support async")
89
 
90
 
 
7
  from langchain.tools import BaseTool
8
  from typing import Optional, Type
9
  from langchain.callbacks.manager import AsyncCallbackManagerForToolRun
10
+ from PIL import Image
11
 
12
  class ImageProcessor(BaseTool):
13
 
14
  name = "ImageProcessor"
15
  description = "useful when you need to extract info from an image in an img_path corresponding to a receipt or invoice and tries to preprocess it returning all the text in the image using an OCR system."
16
 
17
+ def binarize(self,img_path):
18
  """
19
  This function is to binarize an input image
20
  :param img: image in format of (h, w, channel)
 
32
  new = np.clip(new, 0, 255).astype(np.uint8)
33
  return new
34
 
35
+ def deskew(self,img):
36
+ coords = np.column_stack(np.where(img > 0))
37
  angle = cv2.minAreaRect(coords)[-1]
38
  if angle < -45:
39
  angle = -(90 + angle)
40
  else:
41
  angle = -angle
42
+ (h, w) = img.shape[:2]
43
  center = (w // 2, h // 2)
44
  M = cv2.getRotationMatrix2D(center, angle, 1.0)
45
+ rotated = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
46
  return rotated
47
 
48
  def dilate_erode(self,img):
 
60
  img= cv2.dilate(img, kernel2, iterations=1)
61
  return img
62
 
63
+
64
+ def detect_angle(self,img_path):
65
+ """detects angle of rotation in the image using the text lines found"""
66
+ ##taken from https://stackoverflow.com/questions/13872331/rotating-an-image-with-orientation-specified-in-exif-using-python-without-pil-in
67
+ pil_img=Image.open(img_path)
68
+ img_exif = pil_img.getexif()
69
+ if len(img_exif):
70
+ if img_exif[274] == 3:
71
+ pil_img = pil_img.transpose(Image.ROTATE_180)
72
+ elif img_exif[274] == 6:
73
+ pil_img = pil_img.transpose(Image.ROTATE_270)
74
+ elif img_exif[274] == 8:
75
+ pil_img = pil_img.transpose(Image.ROTATE_90)
76
+
77
+ return np.array(pil_img)[:, :, ::-1] #convert to BGR
78
+
79
  def opening(self,image):
80
  kernel = np.ones((5,5),np.uint8)
81
  return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
82
 
83
+ def process_image(self,img_path):
84
  img=self.binarize(img_path)
85
  img=self.remove_watermark(img)
86
  return img
 
90
  return text
91
 
92
  def _run(self,img_path,save_to_disk=False):
93
+ img=self.process_image(img_path)
94
  text=self.img_to_text(img)
95
  if save_to_disk:
96
  with open(f"/tmp/{str(img_path).split('/')[-1].replace('.jpg','.txt')}",'w') as f:
97
  f.write(text)
98
+ cv2.imwrite(f"images/rotated-{img_pth.name}",img)
99
  return text
100
 
101
  # as used in langchain documentation https://python.langchain.com/docs/modules/agents/tools/custom_tools
102
  async def _arun(self, img_path: str,save_to_disk=False, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
103
  ) -> str:
104
  """Use the tool asynchronously."""
105
+ raise NotImplementedError("does not support async")
106
 
107