import fitz import io import base64 from PIL import Image import gradio as gr import cv2 import requests import tempfile import os import openai prompt = "Type and press Enter" def pdf_to_img(pdf_path): pdf_document = fitz.open(pdf_path) counter = 1 img_list = [] for page_number in range(len(pdf_document)): page = pdf_document[page_number] image_list = page.get_images() for image in image_list: base_img = pdf_document.extract_image(image[0]) image_data = base_img["image"] img = Image.open(io.BytesIO(image_data)) extention = base_img['ext'] img.save(open(f"image{counter}.{extention}","wb")) img_list.append(f"image{counter}.{extention}") counter += 1 # img_list = [( # ( # img_list # ), f"label {i}") # for i in range(len(img_list)) # ] return (img_list) # def show_images(image_name): # return image_name def concate_image(img_input): img_input = img_input.replace(' ','') img_input = img_input.replace("'","") img_name_lst = img_input.split(',') if len(img_name_lst) == 1 : return img_name_lst[0] else: img1 = cv2.imread(img_name_lst[0]) img2 = cv2.imread(img_name_lst[1]) target_width = img1.shape[0] target_height = img1.shape[1] img2 = cv2.resize(img2,(target_height,target_width)) im_v = cv2.vconcat([img1, img2]) # im_v = cv2.cvtColor(im_v, cv2.COLOR_BGR2RGB) cv2.imwrite('merge_image.jpg', im_v) return 'merge_image.jpg' def encode_image(image_path): file_path = concate_image(image_path) with open(file_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') def api_calling(image, prompt, api_key): base64_image = encode_image(image) if len(prompt) == 0: prompt = "Compare the two graphs shown in the image and list the differences?" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" } payload = { "model": "gpt-4-vision-preview", "messages": [ { "role": "user", "content": [ { "type": "text", "text": prompt }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] } ], "max_tokens": 500 } response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) img_preview = response.json() return img_preview["choices"][0]["message"]["content"] def message_and_history(img_input_name, input, history, api_key): history = history or [] s = list(sum(history, ())) s.append(input) inp = ' '.join(s) output = api_calling(img_input_name,inp, api_key) if len(input) == 0: input = "Difference between the graphs." history.append((input, output)) else: history.append((input, output)) return history, history title = "Compare the Images" with gr.Blocks(theme=gr.themes.Glass(primary_hue=gr.themes.colors.slate)) as demo: gr.Markdown(f'

{title}

') with gr.Row(): with gr.Row(): with gr.Column(): file_input = gr.File(type="filepath", label="Upload .pdf file") upload_button = gr.Button(value="Show Images") img_gallery = gr.Gallery(label="Generated images", show_label=True, elem_id="gallery", object_fit="contain", height="auto",allow_preview=True) # output_img_thumb = gr.Image(type="filepath",label="Image Thumbnail") # output_img_thumb = gr.Textbox(label="Image names") with gr.Row(): with gr.Column(): image_name = gr.Textbox(label="Name of the Images to be Compared") image_name_lst = [] image_id_lst = [] image_name_str = '' def get_select_index(evt: gr.SelectData): image_name = evt.value['image']['path'] image_name_lst = image_name.split('/') if len(image_id_lst) >= 2 : image_id_lst[0] = image_id_lst[1] image_id_lst[1] = image_name_lst[4] else: image_id_lst.append(image_name_lst[4]) if len(image_id_lst) == 1 : image_name_str = image_id_lst[0] return image_name_str else: image_name_str = image_id_lst[0] + ',' + image_id_lst[1] return image_name_str img_gallery.select(get_select_index, None, image_name) show_button = gr.Button(value="Show Images") output_img = gr.Image(type="filepath",label="Selected Images") with gr.Row(): with gr.Column(): api_input = gr.Textbox(label= "Enter Api-key") show_concat = gr.Button(value="Compare Images") chatbot = gr.Chatbot() message = gr.Textbox(label="User", placeholder=prompt) state = gr.State() upload_button.click(pdf_to_img, inputs=file_input, outputs=[img_gallery]) # upload_button.click(pdf_to_img, inputs=file_input, outputs=[output_img_name]) show_button.click(concate_image, inputs=image_name, outputs=output_img) show_concat.click(message_and_history, inputs=[output_img, message, state, api_input], outputs=[chatbot, state]) message.submit(message_and_history, inputs=[output_img, message, state, api_input], outputs=[chatbot, state]) message.submit(lambda: None, None, message, queue=False) demo.launch()