File size: 2,926 Bytes
5ed0494
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
import time
from PIL import Image
import streamlit as st
import google.generativeai as genai

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Configure the Google AI Python SDK
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

def upload_to_gemini(path, mime_type=None):
    """Uploads the given file to Gemini."""
    file = genai.upload_file(path, mime_type=mime_type)
    return file

def wait_for_files_active(files):
    """Waits for the given files to be active."""
    for name in (file.name for file in files):
        file = genai.get_file(name)
        while file.state.name == "PROCESSING":
            print(".", end="", flush=True)
            time.sleep(10)
            file = genai.get_file(name)
        if file.state.name != "ACTIVE":
            raise Exception(f"File {file.name} failed to process")

def get_gemini_response(input, images):
    context = """Generates a response based on the images and input prompt."""
    model = genai.GenerativeModel('gemini-pro-vision')
    responses = []
    for image in images:
        if input != "":
            input += context
            response = model.generate_content([input, image])
        else:
            response = model.generate_content(image)
        
        # Use result.parts to access the response parts
        for part in response.parts:
            if part.text:
                responses.append(part.text)
    return responses

def visoto():
    """Main function to run the Streamlit app."""
    st.title= "Gemini Image Demo"
    st.header= "Image Chat Assistant"
    
    input = st.text_input("Input Prompt: ", key="input")
    
    # State variable to control camera input visibility
    if 'camera_open' not in st.session_state:
        st.session_state.camera_open = False
    
    if st.button("Open Camera"):
        st.session_state.camera_open = True
    
    if st.button("Close Camera"):
        st.session_state.camera_open = False
    
    camera_image = None
    if st.session_state.camera_open:
        camera_image = st.camera_input("Capture an image")
    
    uploaded_files = st.file_uploader("Choose images...", type=["jpg", "jpeg", "png"], accept_multiple_files=True)
    images = []
    
    if camera_image is not None:
        images.append(Image.open(camera_image))
        st.image(images[-1], caption="Captured Image.", use_column_width=True)
    
    if uploaded_files is not None:
        for uploaded_file in uploaded_files:
            image = Image.open(uploaded_file)
            images.append(image)
            st.image(image, caption="Uploaded Image.", use_column_width=True)
    
    submit = st.button("Tell me about the images")
    if submit and images:
        responses = get_gemini_response(input, images)
        st.subheader("The Responses are")
        for response in responses:
            st.write(response)

if __name__ == "__main__":
    visoto()