Spaces:
Sleeping
Sleeping
File size: 5,206 Bytes
bed1057 53b8f4b 28e97a7 bed1057 0cd5480 bed1057 28e97a7 bed1057 0758797 bed1057 511942b bed1057 511942b e384767 b7a3caf 28e97a7 bed1057 b7a3caf 28e97a7 53b8f4b 28e97a7 b7a3caf 28e97a7 b7a3caf 53b8f4b b7a3caf 8f6fd08 511942b d471962 511942b bed1057 b7a3caf bed1057 b7a3caf bed1057 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import streamlit as st
from PIL import Image
import os
import base64
import fitz # PyMuPDF
from helper import (
custom_file_uploader, resize_image, convert_image_to_base64, post_request_and_parse_response,
draw_bounding_boxes_for_textract, extract_text_from_textract_blocks, ChatGPTClient
)
import io
# Load OpenAI API Key from environment variable
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
TEXTRACT_API_URL = os.environ["TEXTRACT_API_URL"]
st.set_page_config(page_title="💬 Chat with OCR 📝", layout="wide")
# Initialize chat history if not in session state
if "messages" not in st.session_state:
st.session_state.messages = []
# Sidebar for image upload
with st.sidebar:
st.title("🖼️ Upload and Display Images")
# Display a placeholder for uploaded image
st.warning("Please upload an image or a single-page PDF file!")
uploaded_file = st.file_uploader("Upload an Image or PDF", type=['PDF'], label_visibility="collapsed")
pil_image = None
if uploaded_file:
# Handle PDF file
if uploaded_file.type == "application/pdf":
try:
# Read PDF as bytes
pdf_bytes = uploaded_file.read()
pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf")
# Check if the PDF has only one page
if pdf_document.page_count != 1:
st.warning("Please upload a PDF with only one page!")
else:
# Convert the first page to an image
page = pdf_document.load_page(0)
pix = page.get_pixmap()
image_bytes = pix.tobytes()
pil_image = Image.open(io.BytesIO(image_bytes))
except Exception as e:
st.error(f"Failed to convert PDF to image: {e}")
else:
# Handle image file
pil_image = Image.open(uploaded_file)
if pil_image:
resized_image = resize_image(pil_image)
with st.expander("Original Image", expanded=False):
st.image(pil_image, caption="Uploaded Image", use_column_width=True)
# Convert image to base64 and send to Textract API
image_base64 = convert_image_to_base64(resized_image)
payload = {"image": image_base64}
result_dict = post_request_and_parse_response(TEXTRACT_API_URL, payload)
# Draw bounding boxes
image_with_boxes = draw_bounding_boxes_for_textract(resized_image.copy(), result_dict)
with st.expander("Image with Bounding Boxes", expanded=True):
st.image(image_with_boxes, caption="Image with Bounding Boxes", use_column_width=True)
# Extract text from Textract
cleaned_up_body = extract_text_from_textract_blocks(result_dict['body'])
# Display JSON body in the sidebar inside an expander (default not expanded)
with st.expander("View JSON Body", expanded=False):
st.json(result_dict)
# Display cleaned-up body (text extracted from JSON) in the sidebar inside an expander (default not expanded)
with st.expander("View Cleaned-up Text", expanded=True):
st.text_area("Cleaned-up Text", cleaned_up_body, height=200, key="cleaned_text_area", help="Drag the bottom right corner to resize")
# Add some space at the bottom of the sidebar before the "Clear Session" button
st.sidebar.markdown("<br><br><br><br>", unsafe_allow_html=True)
# Clear session button at the bottom of the sidebar
if st.sidebar.button("Clear Session"):
st.session_state.messages = []
# Main chat interface
st.title("Chat with OCR Output")
# Display previous messages from session state
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Initialize ChatGPTClient with session state history
if uploaded_file and pil_image:
history_copy = st.session_state.messages.copy()
if cleaned_up_body:
history_copy.append({"role": "system", "content": cleaned_up_body})
bot = ChatGPTClient(
api_key=OPENAI_API_KEY,
protocol="You are fed with the text portion of json file that come out of OCR after scanning an image. User will ask you questions about this json file.",
body=cleaned_up_body
)
bot.history = history_copy # Set ChatGPT history to session state messages
# React to user input
if prompt := st.chat_input("Ask me about the image"):
# Display user message in chat container
st.chat_message("user").markdown(prompt)
st.session_state.messages.append({"role": "user", "content": prompt})
# Generate a response using ChatGPTClient
if uploaded_file and pil_image:
response = bot.generate_response(prompt)
else:
response = "Please upload an image before asking questions."
# Display assistant message in chat container
st.chat_message("assistant").markdown(response)
st.session_state.messages.append({"role": "assistant", "content": response})
|