app updated
Browse files- app.py +4 -0
- utils/cnn_transformer.py +5 -6
- utils/helpers.py +32 -29
app.py
CHANGED
|
@@ -156,6 +156,10 @@ def main():
|
|
| 156 |
{"Key": keys, "Values": values}
|
| 157 |
)
|
| 158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
# Convert DataFrame to CSV
|
| 160 |
csv = sample_payload_output.to_csv(index=False)
|
| 161 |
|
|
|
|
| 156 |
{"Key": keys, "Values": values}
|
| 157 |
)
|
| 158 |
|
| 159 |
+
# Display table
|
| 160 |
+
with st.expander("Inspect table (before download)"):
|
| 161 |
+
st.table(sample_payload_output)
|
| 162 |
+
|
| 163 |
# Convert DataFrame to CSV
|
| 164 |
csv = sample_payload_output.to_csv(index=False)
|
| 165 |
|
utils/cnn_transformer.py
CHANGED
|
@@ -3,11 +3,11 @@ import os
|
|
| 3 |
os.environ["KERAS_BACKEND"] = "tensorflow"
|
| 4 |
|
| 5 |
import re
|
| 6 |
-
import numpy as np
|
| 7 |
-
import matplotlib.pyplot as plt
|
| 8 |
|
| 9 |
-
import tensorflow as tf
|
| 10 |
import keras
|
|
|
|
|
|
|
|
|
|
| 11 |
from keras import layers
|
| 12 |
from keras.applications import efficientnet
|
| 13 |
from keras.layers import TextVectorization
|
|
@@ -319,7 +319,6 @@ class ImageCaptioningModel(keras.Model):
|
|
| 319 |
return [self.loss_tracker, self.acc_tracker]
|
| 320 |
|
| 321 |
|
| 322 |
-
|
| 323 |
strip_chars = "!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
|
| 324 |
strip_chars = strip_chars.replace("<", "")
|
| 325 |
strip_chars = strip_chars.replace(">", "")
|
|
@@ -350,7 +349,7 @@ def generate_caption(caption_model: None):
|
|
| 350 |
|
| 351 |
# Pass the image to the CNN
|
| 352 |
# img = tf.expand_dims(sample_img, 0)
|
| 353 |
-
#
|
| 354 |
img = None
|
| 355 |
img = caption_model.cnn_model(img)
|
| 356 |
|
|
@@ -376,4 +375,4 @@ def generate_caption(caption_model: None):
|
|
| 376 |
|
| 377 |
decoded_caption = decoded_caption.replace("<start> ", "")
|
| 378 |
decoded_caption = decoded_caption.replace(" <end>", "").strip()
|
| 379 |
-
print("Predicted Caption: ", decoded_caption)
|
|
|
|
| 3 |
os.environ["KERAS_BACKEND"] = "tensorflow"
|
| 4 |
|
| 5 |
import re
|
|
|
|
|
|
|
| 6 |
|
|
|
|
| 7 |
import keras
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import numpy as np
|
| 10 |
+
import tensorflow as tf
|
| 11 |
from keras import layers
|
| 12 |
from keras.applications import efficientnet
|
| 13 |
from keras.layers import TextVectorization
|
|
|
|
| 319 |
return [self.loss_tracker, self.acc_tracker]
|
| 320 |
|
| 321 |
|
|
|
|
| 322 |
strip_chars = "!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
|
| 323 |
strip_chars = strip_chars.replace("<", "")
|
| 324 |
strip_chars = strip_chars.replace(">", "")
|
|
|
|
| 349 |
|
| 350 |
# Pass the image to the CNN
|
| 351 |
# img = tf.expand_dims(sample_img, 0)
|
| 352 |
+
# TOOD
|
| 353 |
img = None
|
| 354 |
img = caption_model.cnn_model(img)
|
| 355 |
|
|
|
|
| 375 |
|
| 376 |
decoded_caption = decoded_caption.replace("<start> ", "")
|
| 377 |
decoded_caption = decoded_caption.replace(" <end>", "").strip()
|
| 378 |
+
print("Predicted Caption: ", decoded_caption)
|
utils/helpers.py
CHANGED
|
@@ -4,22 +4,20 @@ import json
|
|
| 4 |
import os
|
| 5 |
from typing import Any, Dict, List
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
import pandas as pd
|
| 8 |
import requests
|
| 9 |
import streamlit as st
|
| 10 |
-
from
|
| 11 |
-
import google.generativeai as palm
|
| 12 |
-
from pypdf import PdfReader
|
| 13 |
from langchain.text_splitter import (
|
| 14 |
RecursiveCharacterTextSplitter,
|
| 15 |
SentenceTransformersTokenTextSplitter,
|
| 16 |
)
|
| 17 |
-
import
|
| 18 |
-
|
| 19 |
-
import streamlit as st
|
| 20 |
-
import chromadb
|
| 21 |
-
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
|
| 22 |
-
|
| 23 |
|
| 24 |
# API Key (You should set this in your environment variables)
|
| 25 |
# api_key = st.secrets["PALM_API_KEY"]
|
|
@@ -187,10 +185,10 @@ def displayPDF(file: str) -> None:
|
|
| 187 |
# Opening the PDF file in binary read mode
|
| 188 |
with open(file, "rb") as f:
|
| 189 |
# Encoding the PDF file content to base64
|
| 190 |
-
base64_pdf: str = base64.b64encode(f.read()).decode(
|
| 191 |
|
| 192 |
# Creating an HTML embed string for displaying the PDF
|
| 193 |
-
pdf_display: str =
|
| 194 |
|
| 195 |
# Using Streamlit to display the HTML embed string as unsafe HTML
|
| 196 |
st.markdown(pdf_display, unsafe_allow_html=True)
|
|
@@ -199,16 +197,16 @@ def displayPDF(file: str) -> None:
|
|
| 199 |
def draw_boxes(image: Any, predictions: List[Dict[str, Any]]) -> Any:
|
| 200 |
"""
|
| 201 |
Draws bounding boxes and labels onto an image based on provided predictions.
|
| 202 |
-
|
| 203 |
Parameters:
|
| 204 |
- image (Any): The image to annotate, which should support the PIL drawing interface.
|
| 205 |
- predictions (List[Dict[str, Any]]): A list of predictions where each prediction is a dictionary
|
| 206 |
-
containing 'label', 'score', and 'box' keys. The 'box' is another dictionary with 'xmin',
|
| 207 |
'ymin', 'xmax', and 'ymax' as keys representing coordinates for the bounding box.
|
| 208 |
-
|
| 209 |
Returns:
|
| 210 |
- Any: The annotated image with bounding boxes and labels drawn on it.
|
| 211 |
-
|
| 212 |
Note:
|
| 213 |
- This function assumes that the incoming image supports the PIL ImageDraw interface.
|
| 214 |
- The function directly modifies the input image and returns it.
|
|
@@ -235,7 +233,9 @@ def draw_boxes(image: Any, predictions: List[Dict[str, Any]]) -> Any:
|
|
| 235 |
return image
|
| 236 |
|
| 237 |
|
| 238 |
-
def draw_bounding_boxes_for_textract(
|
|
|
|
|
|
|
| 239 |
"""
|
| 240 |
Draws bounding boxes on an image based on the provided JSON data from Textract.
|
| 241 |
|
|
@@ -248,31 +248,34 @@ def draw_bounding_boxes_for_textract(image: Image.Image, json_data: Dict[str, An
|
|
| 248 |
"""
|
| 249 |
# Load the image from the provided path
|
| 250 |
draw = ImageDraw.Draw(image)
|
| 251 |
-
|
| 252 |
# Parse the JSON data
|
| 253 |
try:
|
| 254 |
data = json_data
|
| 255 |
-
blocks = json.loads(data[
|
| 256 |
except json.JSONDecodeError:
|
| 257 |
-
st.error(
|
| 258 |
return image
|
| 259 |
-
|
| 260 |
if blocks is None:
|
| 261 |
-
st.error(
|
| 262 |
return image
|
| 263 |
-
|
| 264 |
# Iterate through the elements to find bounding boxes and draw them
|
| 265 |
for item in blocks:
|
| 266 |
-
if
|
| 267 |
-
bbox = item[
|
| 268 |
# Extract coordinates and dimensions
|
| 269 |
-
left, top, width, height =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
# Calculate bounding box coordinates in image space
|
| 271 |
left_top = (left * image.width, top * image.height)
|
| 272 |
right_bottom = ((left + width) * image.width, (top + height) * image.height)
|
| 273 |
# Draw rectangle
|
| 274 |
-
draw.rectangle([left_top, right_bottom], outline=
|
| 275 |
-
|
| 276 |
-
return image
|
| 277 |
-
|
| 278 |
|
|
|
|
|
|
| 4 |
import os
|
| 5 |
from typing import Any, Dict, List
|
| 6 |
|
| 7 |
+
import chromadb
|
| 8 |
+
import google.generativeai as palm
|
| 9 |
+
import matplotlib.patches as patches
|
| 10 |
+
import matplotlib.pyplot as plt
|
| 11 |
import pandas as pd
|
| 12 |
import requests
|
| 13 |
import streamlit as st
|
| 14 |
+
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
|
|
|
|
|
|
|
| 15 |
from langchain.text_splitter import (
|
| 16 |
RecursiveCharacterTextSplitter,
|
| 17 |
SentenceTransformersTokenTextSplitter,
|
| 18 |
)
|
| 19 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 20 |
+
from pypdf import PdfReader
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# API Key (You should set this in your environment variables)
|
| 23 |
# api_key = st.secrets["PALM_API_KEY"]
|
|
|
|
| 185 |
# Opening the PDF file in binary read mode
|
| 186 |
with open(file, "rb") as f:
|
| 187 |
# Encoding the PDF file content to base64
|
| 188 |
+
base64_pdf: str = base64.b64encode(f.read()).decode("utf-8")
|
| 189 |
|
| 190 |
# Creating an HTML embed string for displaying the PDF
|
| 191 |
+
pdf_display: str = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf">'
|
| 192 |
|
| 193 |
# Using Streamlit to display the HTML embed string as unsafe HTML
|
| 194 |
st.markdown(pdf_display, unsafe_allow_html=True)
|
|
|
|
| 197 |
def draw_boxes(image: Any, predictions: List[Dict[str, Any]]) -> Any:
|
| 198 |
"""
|
| 199 |
Draws bounding boxes and labels onto an image based on provided predictions.
|
| 200 |
+
|
| 201 |
Parameters:
|
| 202 |
- image (Any): The image to annotate, which should support the PIL drawing interface.
|
| 203 |
- predictions (List[Dict[str, Any]]): A list of predictions where each prediction is a dictionary
|
| 204 |
+
containing 'label', 'score', and 'box' keys. The 'box' is another dictionary with 'xmin',
|
| 205 |
'ymin', 'xmax', and 'ymax' as keys representing coordinates for the bounding box.
|
| 206 |
+
|
| 207 |
Returns:
|
| 208 |
- Any: The annotated image with bounding boxes and labels drawn on it.
|
| 209 |
+
|
| 210 |
Note:
|
| 211 |
- This function assumes that the incoming image supports the PIL ImageDraw interface.
|
| 212 |
- The function directly modifies the input image and returns it.
|
|
|
|
| 233 |
return image
|
| 234 |
|
| 235 |
|
| 236 |
+
def draw_bounding_boxes_for_textract(
|
| 237 |
+
image: Image.Image, json_data: Dict[str, Any]
|
| 238 |
+
) -> Image.Image:
|
| 239 |
"""
|
| 240 |
Draws bounding boxes on an image based on the provided JSON data from Textract.
|
| 241 |
|
|
|
|
| 248 |
"""
|
| 249 |
# Load the image from the provided path
|
| 250 |
draw = ImageDraw.Draw(image)
|
| 251 |
+
|
| 252 |
# Parse the JSON data
|
| 253 |
try:
|
| 254 |
data = json_data
|
| 255 |
+
blocks = json.loads(data["body"]) if "body" in data else None
|
| 256 |
except json.JSONDecodeError:
|
| 257 |
+
st.error("Invalid JSON data.")
|
| 258 |
return image
|
| 259 |
+
|
| 260 |
if blocks is None:
|
| 261 |
+
st.error("No bounding box data found.")
|
| 262 |
return image
|
| 263 |
+
|
| 264 |
# Iterate through the elements to find bounding boxes and draw them
|
| 265 |
for item in blocks:
|
| 266 |
+
if "BlockType" in item and item["BlockType"] in ["LINE", "WORD"]:
|
| 267 |
+
bbox = item["Geometry"]["BoundingBox"]
|
| 268 |
# Extract coordinates and dimensions
|
| 269 |
+
left, top, width, height = (
|
| 270 |
+
bbox["Left"],
|
| 271 |
+
bbox["Top"],
|
| 272 |
+
bbox["Width"],
|
| 273 |
+
bbox["Height"],
|
| 274 |
+
)
|
| 275 |
# Calculate bounding box coordinates in image space
|
| 276 |
left_top = (left * image.width, top * image.height)
|
| 277 |
right_bottom = ((left + width) * image.width, (top + height) * image.height)
|
| 278 |
# Draw rectangle
|
| 279 |
+
draw.rectangle([left_top, right_bottom], outline="red", width=2)
|
|
|
|
|
|
|
|
|
|
| 280 |
|
| 281 |
+
return image
|