File size: 5,539 Bytes
0d1a2be
 
 
 
 
7e4d0f8
858825b
 
7e4d0f8
 
0d1a2be
35fc170
 
 
 
858825b
 
8268b9a
858825b
 
 
 
 
 
 
 
 
7e4d0f8
35fc170
7e4d0f8
74b46ed
2ba9756
63a1570
54199b5
35fc170
 
7e4d0f8
35fc170
 
 
 
 
 
 
 
 
7e4d0f8
35fc170
 
 
 
51b847b
 
35fc170
 
 
 
 
5f11108
19e9398
aa6386f
35fc170
 
 
 
 
7e4d0f8
 
 
 
 
 
b7b6034
1e94329
b7b6034
 
7e4d0f8
35fc170
c26bdf7
7e4d0f8
aff1d6f
490ada8
858825b
4de821f
7e4d0f8
490ada8
858825b
39a885a
490ada8
7e4d0f8
 
 
 
 
128a4ce
7e4d0f8
5c80744
238923d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
import base64
import requests
import tempfile
import os
import openai
import cv2
import numpy

prompt = "Type and press Enter"

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def mark_contours(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, None, fx=0.9,fy=0.9)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    ret, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    contours, hierarchy = cv2.findContours(binary, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_NONE)
    image_copy = image.copy()
    image_copy = cv2.drawContours(image_copy, contours, -1, (0, 255, 0), thickness=2, lineType=cv2.LINE_AA)
    cv2.imwrite('cont_image.jpg', image_copy)
    return 'cont_image.jpg'
    

def api_calling(image, prompt, api_key):
    base64_image = encode_image(image)
    if len(prompt) == 0:
        prompt = "I want you to act as an experienced mathematician with good rational-counting skills. I will share an image of floor with you and your objective is to provide the number of tiles that needs replacement and number of tiles that needs cleaning. Let the total number of tiles in the image be x  and number of dirty tiles in the image be y then the number of tiles to be replaced will be equal to y and total number of tiles needs to be cleaned can be calculated by 'x-y'. I have to provide this data to a tile setter and as I will be charged on per tile basis so the tile setter wants to know the exact number of tiles that needs replacement and cleaning separately so that they could provide me a price estimate. The dirty tiles will be replaced and remaining all the tiles will need cleaning. Reply with the number of tiles only with as much accuracy as possible. I want you to keep the reply neat, limiting the reply to less than 40 words in English."
        # prompt = "I want you to act as a friendly tile setters who has 10 years of experience with floor tiling. I will share an image of floor with you and you will reply to me with the number of tiles that need cleaning, if tiles appear clean then you can reply with the fixed answer 'All tiles are clean'. Reply with accuracy and compassion. I want you to keep the reply neat, limiting the reply to 40 words in English."
        # prompt = "I want you to act as a friendly tile setters who has 10 years of experience with floor tiling. I want the dirty tiles to be replaced and rest tiles to be cleaned and as I will be charged on per tile basis so the tile setter wants to know the exact number of tiles that needs cleaning and replacement so that they could estimate the pricing. I will share an image of floor with you and you will reply to me with the number of tiles that need cleaning and replacement. So the dirty tiles will be replaced and remaining all the tiles will need cleaning. Reply with the number of tiles only with as much accuracy as possible. I want you to keep the reply neat, limiting the reply to less than 40 words in English."
        
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": prompt
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                        "detail": "high"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 500,
        "temperature": 0.5,
        "n": 1
    }
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    img_preview = response.json()
    return img_preview["choices"][0]["message"]["content"]

def message_and_history(img, input, history, api_key):
    history = history or []
    s = list(sum(history, ()))
    s.append(input)
    inp = '  '.join(s)
    output = api_calling(img,inp, api_key)
    if len(input) == 0:
        history.append(('',output))
    else:
        history.append((input, output))
    return history, history

block = gr.Blocks(theme=gr.themes.Glass(primary_hue="slate"))
with block:
    gr.Markdown("""<h1>Visual Explorer - Ask and Learn about Images</h1> """)
    with gr.Row():
        with gr.Column():
            img_input = gr.Image(type="filepath", label="Upload Image")
            api_input = gr.Textbox(label= "Enter Api-key")
            upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
        with gr.Column():
            output_img = gr.Image(type="filepath",label="Dirty Portion on Image")
        with gr.Column():
            chatbot = gr.Chatbot(label="Chat with Image")
            message = gr.Textbox(label="User", placeholder=prompt)
            state = gr.State()
            
    upload_button.click(message_and_history, inputs=[img_input, message, state, api_input], outputs=[chatbot, state])
    upload_button.click(mark_contours, inputs=img_input, outputs=output_img)
    message.submit(message_and_history, inputs=[img_input, message, state, api_input], outputs=[chatbot, state])
    message.submit(lambda: None, None, message, queue=False)
block.launch()