File size: 6,160 Bytes
34e9ed0
665b349
 
34e9ed0
3f2a11f
21e1b76
7316c12
 
 
 
21e1b76
 
3f2a11f
03e1ccb
3f2a11f
 
5993d47
3f2a11f
 
 
 
 
 
 
 
 
5993d47
3f2a11f
24099da
 
 
 
 
 
e4fb0ba
d0a42ea
2cc5fb0
 
ceefbac
 
fe4a7be
155bff4
51ee4e3
2cc5fb0
bac89d8
2cc5fb0
 
 
138eab4
 
 
2cc5fb0
57c4e15
bac89d8
451ff98
bcf23f8
8291ca9
 
 
 
2021fd6
8291ca9
 
 
95d6ce9
bcf23f8
 
8291ca9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcf23f8
 
8291ca9
 
 
 
 
 
 
95d6ce9
8291ca9
 
 
 
d0a42ea
47bafef
2597295
fe53790
fd55e2f
e791171
c6d66a6
a157d93
c6d66a6
4d45356
3753415
5208bc8
1670d65
637173d
0ecf06d
a157d93
c6d66a6
5557f0c
47bafef
5557f0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d2705e
47bafef
a11d8b4
9d2705e
 
 
47bafef
9d2705e
 
 
 
ecc8cd8
77f28df
9d2705e
7e52d4c
0ce68fa
8291ca9
ceefbac
38511f3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import fitz
import io
import base64
from PIL import Image
import gradio as gr
import cv2
import requests
import tempfile
import os
import openai

prompt = "Type and press Enter"

def pdf_to_img(pdf_path):
    pdf_document = fitz.open(pdf_path)
    counter = 1
    img_list = []
    for page_number in range(len(pdf_document)):
        page = pdf_document[page_number]
        image_list = page.get_images()
        for image in image_list:
            base_img = pdf_document.extract_image(image[0])
            image_data = base_img["image"]
            img = Image.open(io.BytesIO(image_data))
            extention = base_img['ext']
            img.save(open(f"image{counter}.{extention}","wb"))
            img_list.append(f"image{counter}.{extention}")
            counter += 1
    # img_list = [(
    #     (
    #         img_list
    #     ), f"label {i}")
    #     for i in range(len(img_list))
    # ]
    return (img_list)

# def show_images(image_name):
#     return image_name

def concate_image(img_input):
    img_input = img_input.replace(' ','')
    img_input = img_input.replace("'","")
    img_name_lst = img_input.split(',')
    if len(img_name_lst) == 1 :
        return img_name_lst[0]
    else:
        img1 = cv2.imread(img_name_lst[0]) 
        img2 = cv2.imread(img_name_lst[1])
        target_width = img1.shape[0]
        target_height = img1.shape[1]
        img2 = cv2.resize(img2,(target_height,target_width))
        im_v = cv2.vconcat([img1, img2])
        # im_v = cv2.cvtColor(im_v, cv2.COLOR_BGR2RGB)
        cv2.imwrite('merge_image.jpg', im_v)
        return 'merge_image.jpg'

def encode_image(image_path):
    file_path = concate_image(image_path)
    with open(file_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def api_calling(image, prompt, api_key):
    base64_image = encode_image(image)
    if len(prompt) == 0:
        prompt = "Compare the two graphs shown in the image and list the differences?"
    
    
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": prompt
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 500
    }
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    img_preview = response.json()
    return img_preview["choices"][0]["message"]["content"]


def message_and_history(img_input_name, input, history, api_key):
    history = history or []
    s = list(sum(history, ()))
    s.append(input)
    inp = '  '.join(s)
    output = api_calling(img_input_name,inp, api_key)
    if len(input) == 0:
        input = "Difference between the graphs."
        history.append((input, output))
    else:
        history.append((input, output))
    return history, history
    
title = "Compare the Images"


with gr.Blocks(theme=gr.themes.Glass(primary_hue=gr.themes.colors.slate)) as demo:
    gr.Markdown(f'<h1 style="text-align: center;">{title}</h1>')
    with gr.Row():
        with gr.Row():
            with gr.Column():
                file_input = gr.File(type="filepath", label="Upload .pdf file")
                upload_button = gr.Button(value="Show Images")
                img_gallery = gr.Gallery(label="Generated images", show_label=True, elem_id="gallery", object_fit="contain", height="auto",allow_preview=True)
                
                # output_img_thumb = gr.Image(type="filepath",label="Image Thumbnail")
                # output_img_thumb = gr.Textbox(label="Image names")
        with gr.Row():
            with gr.Column():
                 
                image_name = gr.Textbox(label="Name of the Images to be Compared")
                image_name_lst = []
                image_id_lst = []
                image_name_str = ''
                def get_select_index(evt: gr.SelectData):
                    image_name = evt.value['image']['path']
                    image_name_lst = image_name.split('/')
                    if len(image_id_lst) >= 2 :
                        image_id_lst[0] = image_id_lst[1]
                        image_id_lst[1] = image_name_lst[4]
                    else:
                        image_id_lst.append(image_name_lst[4])
                    if len(image_id_lst) == 1 :
                        image_name_str = image_id_lst[0]
                        return image_name_str
                    else:
                        image_name_str = image_id_lst[0] + ',' + image_id_lst[1]
                        return image_name_str
                img_gallery.select(get_select_index, None, image_name)
                show_button = gr.Button(value="Show Images")
                output_img = gr.Image(type="filepath",label="Selected Images")
                
        with gr.Row():
            with gr.Column():
                api_input = gr.Textbox(label= "Enter Api-key")
                show_concat = gr.Button(value="Compare Images")
                chatbot = gr.Chatbot()
                message = gr.Textbox(label="User", placeholder=prompt)
                state = gr.State()
        
    upload_button.click(pdf_to_img, inputs=file_input, outputs=[img_gallery])
    # upload_button.click(pdf_to_img, inputs=file_input, outputs=[output_img_name])
    show_button.click(concate_image, inputs=image_name, outputs=output_img)  
    show_concat.click(message_and_history, inputs=[output_img, message, state, api_input], outputs=[chatbot, state])
    message.submit(message_and_history, inputs=[output_img, message, state, api_input], outputs=[chatbot, state])
    message.submit(lambda: None, None, message, queue=False)
            
demo.launch()