import fitz import io import base64 from PIL import Image import gradio as gr import cv2 import requests import tempfile import os import openai prompt = "Type and press Enter" def pdf_to_img(pdf_path): pdf_document = fitz.open(pdf_path) counter = 1 img_list = [] for page_number in range(len(pdf_document)): page = pdf_document[page_number] image_list = page.get_images() for image in image_list: base_img = pdf_document.extract_image(image[0]) image_data = base_img["image"] img = Image.open(io.BytesIO(image_data)) extention = base_img['ext'] img.save(open(f"image{counter}.{extention}","wb")) img_list.append(f"image{counter}.{extention}") counter += 1 # img_list = [( # ( # img_list # ), f"label {i}") # for i in range(len(img_list)) # ] return (img_list) # def show_images(image_name): # return image_name def concate_image(img_input): img_input = img_input.replace(' ','') img_input = img_input.replace("'","") img_name_lst = img_input.split(',') if len(img_name_lst) == 1 : return img_name_lst[0] else: img1 = cv2.imread(img_name_lst[0]) img2 = cv2.imread(img_name_lst[1]) target_width = img1.shape[0] target_height = img1.shape[1] img2 = cv2.resize(img2,(target_height,target_width)) im_v = cv2.vconcat([img1, img2]) # im_v = cv2.cvtColor(im_v, cv2.COLOR_BGR2RGB) cv2.imwrite('merge_image.jpg', im_v) return 'merge_image.jpg' def encode_image(image_path): file_path = concate_image(image_path) with open(file_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') def api_calling(image, prompt, api_key): base64_image = encode_image(image) if len(prompt) == 0: prompt = "Compare the two graphs shown in the image and list the differences?" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" } payload = { "model": "gpt-4-vision-preview", "messages": [ { "role": "user", "content": [ { "type": "text", "text": prompt }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] } ], "max_tokens": 500 } response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) img_preview = response.json() return img_preview["choices"][0]["message"]["content"] def message_and_history(img_input_name, input, history, api_key): history = history or [] s = list(sum(history, ())) s.append(input) inp = ' '.join(s) output = api_calling(img_input_name,inp, api_key) if len(input) == 0: input = "Difference between the graphs." history.append((input, output)) else: history.append((input, output)) return history, history title = "Compare the Images" with gr.Blocks(theme=gr.themes.Glass(primary_hue=gr.themes.colors.slate)) as demo: gr.Markdown(f'