sajjadrahman56 commited on
Commit
a047f73
Β·
verified Β·
1 Parent(s): c26d69b

create app.py file - main code file

Browse files
Files changed (1) hide show
  1. app.py +302 -0
app.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import openai
3
+ import base64
4
+ from PIL import Image
5
+ import io
6
+ import pymupdf as fitz
7
+
8
+ # ---------- PDF Text Extraction ----------
9
+ def extract_text_from_pdf(pdf_file):
10
+ try:
11
+ text = ""
12
+ pdf_document = fitz.open(pdf_file)
13
+ for page in pdf_document:
14
+ text += page.get_text()
15
+ pdf_document.close()
16
+ return text
17
+ except Exception as e:
18
+ return f"Error extracting text from PDF: {str(e)}"
19
+
20
+ # ---------- PDF Quiz Generation ----------
21
+ def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
22
+ if not openai_api_key:
23
+ return "Error: No API key provided."
24
+
25
+ openai.api_key = openai_api_key
26
+ limited_content = pdf_content[:8000] if len(pdf_content) > 8000 else pdf_content
27
+
28
+ prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions.
29
+ For each question:
30
+ 1. Create a clear question based on key concepts in the document
31
+ 2. Provide 4 options (A, B, C, D)
32
+ 3. Indicate the correct answer
33
+ 4. Briefly explain the correct answer
34
+ Document content:
35
+ {limited_content}
36
+ """
37
+
38
+ try:
39
+ response = openai.ChatCompletion.create(
40
+ model=model_choice,
41
+ messages=[{"role": "user", "content": prompt}]
42
+ )
43
+ return response.choices[0].message.content
44
+ except Exception as e:
45
+ return f"Error generating quiz: {str(e)}"
46
+
47
+ # ---------- Image Processing ----------
48
+ def generate_image_response(input_text, image, openai_api_key, model_choice):
49
+ if not openai_api_key:
50
+ return "Error: No API key provided."
51
+
52
+ openai.api_key = openai_api_key
53
+
54
+ # Convert image to base64
55
+ buffered = io.BytesIO()
56
+ image.save(buffered, format="PNG")
57
+ base64_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
58
+
59
+ try:
60
+ response = openai.ChatCompletion.create(
61
+ model=model_choice,
62
+ messages=[
63
+ {
64
+ "role": "user",
65
+ "content": [
66
+ {"type": "text", "text": input_text},
67
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_str}"}}
68
+ ]
69
+ }
70
+ ]
71
+ )
72
+ return response.choices[0].message.content
73
+ except Exception as e:
74
+ return f"Error processing image: {str(e)}"
75
+
76
+ # ---------- Voice Processing ----------
77
+ def process_voice_input(audio_path, openai_api_key, model_choice):
78
+ if not openai_api_key:
79
+ return "Error: No API key provided."
80
+
81
+ try:
82
+ openai.api_key = openai_api_key
83
+ audio_file = open(audio_path, "rb")
84
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
85
+ prompt = transcript["text"]
86
+ audio_file.close()
87
+
88
+ response = openai.ChatCompletion.create(
89
+ model=model_choice,
90
+ messages=[{"role": "user", "content": prompt}]
91
+ )
92
+ return response.choices[0].message.content, prompt
93
+ except Exception as e:
94
+ return f"Error processing voice: {str(e)}", ""
95
+
96
+ # ---------- Unified Chatbot Handler ----------
97
+ def chatbot(input_text, image, pdf_file, audio_file, openai_api_key, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, audio_mode, history):
98
+ if history is None:
99
+ history = []
100
+
101
+ new_pdf_content = pdf_content
102
+
103
+ # Handle PDF file upload and extract text
104
+ if pdf_file is not None:
105
+ new_pdf_content = extract_text_from_pdf(pdf_file)
106
+
107
+ # Handle PDF Quiz Mode
108
+ if pdf_quiz_mode:
109
+ if new_pdf_content:
110
+ quiz_response = generate_mcq_quiz(new_pdf_content, int(num_quiz_questions), openai_api_key, model_choice)
111
+ history.append((f"πŸ‘€: [PDF Quiz - {num_quiz_questions} questions]", f"πŸ€–: {quiz_response}"))
112
+ else:
113
+ history.append(("πŸ‘€: [PDF Quiz]", "πŸ€–: Please upload a PDF file first."))
114
+
115
+ # Handle Audio Mode
116
+ elif audio_mode:
117
+ if audio_file is not None:
118
+ response, transcribed_text = process_voice_input(audio_file, openai_api_key, model_choice)
119
+ history.append((f"πŸ‘€ (Voice): {transcribed_text}", f"πŸ€–: {response}"))
120
+ else:
121
+ history.append(("πŸ‘€: [Audio]", "πŸ€–: Please upload or record an audio file."))
122
+
123
+ # Handle Image Mode
124
+ else:
125
+ if image is not None:
126
+ response = generate_image_response(input_text, image, openai_api_key, model_choice)
127
+ history.append((f"πŸ‘€: {input_text or '[Image]'}", f"πŸ€–: {response}"))
128
+ elif input_text:
129
+ # Handle text-only input when no image is provided
130
+ try:
131
+ openai.api_key = openai_api_key
132
+ response = openai.ChatCompletion.create(
133
+ model=model_choice,
134
+ messages=[{"role": "user", "content": input_text}]
135
+ )
136
+ history.append((f"πŸ‘€: {input_text}", f"πŸ€–: {response.choices[0].message.content}"))
137
+ except Exception as e:
138
+ history.append((f"πŸ‘€: {input_text}", f"πŸ€–: Error: {str(e)}"))
139
+
140
+ return "", None, None, None, new_pdf_content, history
141
+
142
+ # ---------- Clear Chat ----------
143
+ def clear_history():
144
+ return "", None, None, None, "", []
145
+
146
+ # ---------- Input Type Toggle ----------
147
+ def update_input_type(choice):
148
+ if choice == "Image":
149
+ hint_text = """
150
+ πŸ’‘ **Image Mode Tips:**
151
+ - Both **o1** and **o3-mini** support image analysis
152
+ - o1 provides more detailed analysis but costs more
153
+ - o3-mini is faster and more cost-effective for simple image questions
154
+ """
155
+ return (
156
+ gr.update(visible=True), # input_text
157
+ gr.update(visible=True), # image_input
158
+ gr.update(visible=False), # pdf_input
159
+ gr.update(visible=False), # audio_input
160
+ gr.update(visible=False), # quiz_slider
161
+ gr.update(value=False), # pdf_quiz_mode
162
+ gr.update(value=False), # audio_mode
163
+ gr.update(value=hint_text, visible=True) # model_hint
164
+ )
165
+ elif choice == "PDF(QUIZ)":
166
+ hint_text = """
167
+ πŸ“š **PDF Quiz Mode Tips:**
168
+ - Both models can generate quizzes from PDF content
169
+ - o1 creates more comprehensive and detailed questions
170
+ - o3-mini generates quizzes faster with good quality
171
+ - Large PDFs are automatically limited to first 8000 characters
172
+ """
173
+ return (
174
+ gr.update(visible=False), # input_text
175
+ gr.update(visible=False), # image_input
176
+ gr.update(visible=True), # pdf_input
177
+ gr.update(visible=False), # audio_input
178
+ gr.update(visible=True), # quiz_slider
179
+ gr.update(value=True), # pdf_quiz_mode
180
+ gr.update(value=False), # audio_mode
181
+ gr.update(value=hint_text, visible=True) # model_hint
182
+ )
183
+ elif choice == "Audio":
184
+ hint_text = """
185
+ 🎀 **Audio Mode Tips:**
186
+ - **Important:** Audio transcription uses OpenAI's `whisper-1` model (separate cost)
187
+ - **gpt-4 transcribe**: More sophisticated responses but higher cost per token
188
+ - **gpt-4-mini-transcribe**: Cost-effective for most audio conversations
189
+ - Supports common audio formats (MP3, WAV, M4A, etc.)
190
+ - Maximum audio file size: 25MB
191
+ """
192
+
193
+ return (
194
+ gr.update(visible=False), # input_text
195
+ gr.update(visible=False), # image_input
196
+ gr.update(visible=False), # pdf_input
197
+ gr.update(visible=True), # audio_input
198
+ gr.update(visible=False), # quiz_slider
199
+ gr.update(value=False), # pdf_quiz_mode
200
+ gr.update(value=True), # audio_mode
201
+ gr.update(value=hint_text, visible=True) # model_hint
202
+ )
203
+
204
+ # ---------- CSS Styling ----------
205
+ custom_css = """
206
+ .gradio-container {
207
+ font-family: 'Arial', sans-serif;
208
+ background-color: #f0f4f8;
209
+ }
210
+ .gradio-header {
211
+ background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%);
212
+ color: white;
213
+ padding: 20px;
214
+ border-radius: 8px;
215
+ text-align: center;
216
+ }
217
+ #submit-btn {
218
+ background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%);
219
+ color: white;
220
+ border-radius: 8px;
221
+ }
222
+ #clear-history {
223
+ background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%);
224
+ color: white;
225
+ border-radius: 8px;
226
+ }
227
+ """
228
+
229
+ # ---------- UI Interface ----------
230
+ def create_interface():
231
+ with gr.Blocks(css=custom_css) as demo:
232
+ gr.Markdown("""
233
+ <div class="gradio-header">
234
+ <h1>Multimodal Chatbot (Image + PDF Quiz + Voice)</h1>
235
+ <h3>Ask via image, PDF, or voice</h3>
236
+ </div>
237
+ """)
238
+
239
+ with gr.Accordion("Instructions", open=False):
240
+ gr.Markdown("""
241
+ - **Image Chat**: Upload an image and ask about it
242
+ - **PDF Quiz**: Upload a PDF and generate MCQs
243
+ - **Audio Chat**: Upload or record audio to chat
244
+ - Always provide your OpenAI API key
245
+ """)
246
+
247
+ # State variables
248
+ pdf_content = gr.State("")
249
+
250
+ with gr.Row():
251
+ openai_api_key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
252
+
253
+ with gr.Row():
254
+ input_type = gr.Radio(["Image", "PDF(QUIZ)", "Audio"], label="Input Type", value="Image")
255
+
256
+ # Model-specific hints that appear based on input type
257
+ model_hint = gr.Markdown("", visible=False)
258
+
259
+ # Input components row - all in one organized row
260
+ with gr.Row():
261
+ input_text = gr.Textbox(label="Question (for images)", visible=True)
262
+ image_input = gr.Image(label="Upload Image", type="pil", visible=True)
263
+ pdf_input = gr.File(label="Upload PDF", visible=False)
264
+ audio_input = gr.Audio(label="Upload/Record Audio", type="filepath", visible=False)
265
+ quiz_slider = gr.Slider(1, 20, value=5, step=1, label="Number of Questions", visible=False)
266
+
267
+ # Hidden state components for mode control
268
+ pdf_quiz_mode = gr.Checkbox(visible=False, value=False)
269
+ audio_mode = gr.Checkbox(visible=False, value=False)
270
+
271
+ with gr.Row():
272
+ model_choice = gr.Dropdown(["o1", "o3-mini","whisper-1", "gpt-4o-mini-transcribe", "gpt-4o-transcribe",], label="Model", value="o1")
273
+ submit_btn = gr.Button("Submit", elem_id="submit-btn")
274
+ clear_btn = gr.Button("Clear History", elem_id="clear-history")
275
+
276
+ chat_history = gr.Chatbot()
277
+
278
+ # Event handlers
279
+ input_type.change(
280
+ update_input_type,
281
+ inputs=[input_type],
282
+ outputs=[input_text, image_input, pdf_input, audio_input, quiz_slider, pdf_quiz_mode, audio_mode, model_hint]
283
+ )
284
+
285
+ submit_btn.click(
286
+ chatbot,
287
+ inputs=[input_text, image_input, pdf_input, audio_input, openai_api_key, model_choice,
288
+ pdf_content, quiz_slider, pdf_quiz_mode, audio_mode, chat_history],
289
+ outputs=[input_text, image_input, pdf_input, audio_input, pdf_content, chat_history]
290
+ )
291
+
292
+ clear_btn.click(
293
+ clear_history,
294
+ outputs=[input_text, image_input, pdf_input, audio_input, pdf_content, chat_history]
295
+ )
296
+
297
+ return demo
298
+
299
+ # ---------- Launch ----------
300
+ if __name__ == "__main__":
301
+ demo = create_interface()
302
+ demo.launch()