Rahatara commited on
Commit
e9b57cc
·
verified ·
1 Parent(s): 900b3b2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import gradio as gr
4
+ import google.generativeai as genai
5
+
6
+ # Configure Gemini API
7
+ genai.configure(api_key=os.environ["GEMINI_API_KEY"])
8
+
9
+ # Upload files to Gemini
10
+ def upload_to_gemini(path, mime_type=None):
11
+ """Uploads the given file to Gemini."""
12
+ file = genai.upload_file(path, mime_type=mime_type)
13
+ print(f"Uploaded file '{file.display_name}' as: {file.uri}")
14
+ return file
15
+
16
+ # Wait for files to process
17
+ def wait_for_files_active(files):
18
+ """Wait until uploaded files are processed and active."""
19
+ print("Waiting for file processing...")
20
+ for name in (file.name for file in files):
21
+ file = genai.get_file(name)
22
+ while file.state.name == "PROCESSING":
23
+ print(".", end="", flush=True)
24
+ time.sleep(5)
25
+ file = genai.get_file(name)
26
+ if file.state.name != "ACTIVE":
27
+ raise Exception(f"File {file.name} failed to process")
28
+ print("...all files ready")
29
+
30
+ # Handle inputs and send a message to Gemini
31
+ def process_inputs(image_path, audio_path, pdf_path, video_path, user_prompt):
32
+ """Handles user inputs, uploads files to Gemini, and queries the model."""
33
+ files = []
34
+
35
+ # Upload files based on user input
36
+ if image_path:
37
+ files.append(upload_to_gemini(image_path, mime_type="image/png"))
38
+ if audio_path:
39
+ files.append(upload_to_gemini(audio_path, mime_type="audio/ogg"))
40
+ if pdf_path:
41
+ files.append(upload_to_gemini(pdf_path, mime_type="application/pdf"))
42
+ if video_path:
43
+ files.append(upload_to_gemini(video_path, mime_type="video/quicktime"))
44
+
45
+ # Wait for files to be processed
46
+ if files:
47
+ wait_for_files_active(files)
48
+
49
+ # Construct conversation history
50
+ history = []
51
+ if files:
52
+ for file in files:
53
+ history.append({"role": "user", "parts": [file]})
54
+ if user_prompt:
55
+ history.append({"role": "user", "parts": [user_prompt]})
56
+
57
+ # Start chat and send a message
58
+ model = genai.GenerativeModel(model_name="gemini-2.0-flash-exp")
59
+ chat_session = model.start_chat(history=history)
60
+ response = chat_session.send_message(user_prompt)
61
+ return response.text
62
+
63
+ # Gradio Interface
64
+ with gr.Blocks() as demo:
65
+ gr.Markdown("## Multimodal Gemini AI Chat Interface")
66
+
67
+ with gr.Row():
68
+ image_input = gr.File(label="Upload Image File (PNG)")
69
+ audio_input = gr.File(label="Upload Audio File (OGG)")
70
+ with gr.Row():
71
+ pdf_input = gr.File(label="Upload PDF File")
72
+ video_input = gr.File(label="Upload Video File (MOV)")
73
+
74
+ user_prompt = gr.Textbox(label="Enter Your Prompt", placeholder="Type your question here...")
75
+
76
+ submit_button = gr.Button("Submit")
77
+ output = gr.Textbox(label="Gemini Response")
78
+
79
+ submit_button.click(
80
+ fn=process_inputs,
81
+ inputs=[image_input, audio_input, pdf_input, video_input, user_prompt],
82
+ outputs=output
83
+ )
84
+
85
+ # Launch Gradio app
86
+ demo.launch()