Rahatara commited on
Commit
c2293ca
·
verified ·
1 Parent(s): 9c01f1a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import gradio as gr
4
+ import google.generativeai as genai
5
+
6
+ # Configure the Gemini API key
7
+ genai.configure(api_key=os.environ["GEMINI_API_KEY"])
8
+
9
+ # Upload files to Gemini
10
+ def upload_to_gemini(file_path, mime_type):
11
+ """Uploads a file to Gemini and returns its reference."""
12
+ file = genai.upload_file(file_path, mime_type=mime_type)
13
+ print(f"Uploaded file '{file.display_name}' with URI: {file.uri}")
14
+ return {"fileData": {"fileUri": file.uri, "mimeType": mime_type}}
15
+
16
+ # Wait for file processing
17
+ def wait_for_files_active(files):
18
+ """Waits until all uploaded files are active."""
19
+ print("Waiting for file processing...")
20
+ for file in files:
21
+ status = genai.get_file(file["fileData"]["fileUri"])
22
+ while status.state.name == "PROCESSING":
23
+ print(".", end="", flush=True)
24
+ time.sleep(5)
25
+ status = genai.get_file(file["fileData"]["fileUri"])
26
+ if status.state.name != "ACTIVE":
27
+ raise Exception(f"File {file['fileData']['fileUri']} failed to process.")
28
+ print("\nAll files are ready.")
29
+
30
+ # Process user inputs and interact with Gemini API
31
+ def process_inputs(image_path, audio_path, pdf_file, video_path, user_prompt):
32
+ """Uploads files, processes content, and queries the Gemini API."""
33
+ contents = []
34
+
35
+ # Upload files if provided
36
+ if image_path:
37
+ contents.append(upload_to_gemini(image_path, mime_type="image/png"))
38
+ if audio_path:
39
+ contents.append(upload_to_gemini(audio_path, mime_type="audio/ogg"))
40
+ if pdf_file:
41
+ contents.append(upload_to_gemini(pdf_file.name, mime_type="application/pdf"))
42
+ if video_path:
43
+ contents.append(upload_to_gemini(video_path, mime_type="video/quicktime"))
44
+
45
+ # Ensure files are ready
46
+ if contents:
47
+ wait_for_files_active(contents)
48
+
49
+ # Add user text prompt
50
+ if user_prompt:
51
+ contents.append({"text": user_prompt})
52
+
53
+ # Send content to Gemini API
54
+ model = genai.GenerativeModel(model_name="gemini-2.0-flash-exp")
55
+ response = model.generate_content(contents=contents)
56
+
57
+ return response.text
58
+
59
+ # Gradio Interface
60
+ with gr.Blocks() as demo:
61
+ gr.Markdown("## Multimodal Chat Interface with Gemini API")
62
+
63
+ with gr.Row():
64
+ image_input = gr.Image(label="Upload an Image")
65
+ audio_input = gr.Audio(label="Upload an Audio File")
66
+ with gr.Row():
67
+ pdf_input = gr.File(label="Upload a PDF")
68
+ video_input = gr.Video(label="Upload a Video File")
69
+
70
+ user_prompt = gr.Textbox(label="Enter Your Prompt", placeholder="Ask something about the files...")
71
+
72
+ submit_button = gr.Button("Submit")
73
+ output_box = gr.Textbox(label="Gemini AI Response")
74
+
75
+ submit_button.click(
76
+ fn=process_inputs,
77
+ inputs=[image_input, audio_input, pdf_input, video_input, user_prompt],
78
+ outputs=output_box
79
+ )
80
+
81
+ # Launch the Gradio app
82
+ demo.launch()