|
|
import gradio as gr |
|
|
import openai |
|
|
import base64 |
|
|
from PIL import Image |
|
|
import io |
|
|
import fitz |
|
|
|
|
|
|
|
|
|
|
|
def extract_text_from_pdf(pdf_file): |
|
|
try: |
|
|
text = "" |
|
|
pdf_document = fitz.open(pdf_file) |
|
|
for page_num in range(len(pdf_document)): |
|
|
page = pdf_document[page_num] |
|
|
text += page.get_text() |
|
|
pdf_document.close() |
|
|
return text |
|
|
except Exception as e: |
|
|
return f"Error extracting text from PDF: {str(e)}" |
|
|
|
|
|
|
|
|
|
|
|
def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice): |
|
|
if not openai_api_key: |
|
|
return "Error: No API key provided." |
|
|
openai.api_key = openai_api_key |
|
|
limited_content = pdf_content[:8000] |
|
|
prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions. |
|
|
For each question: |
|
|
1. Write a clear question |
|
|
2. Give 4 options (A, B, C, D) |
|
|
3. Indicate the correct answer |
|
|
4. Briefly explain why the answer is correct |
|
|
|
|
|
Document: |
|
|
{limited_content} |
|
|
""" |
|
|
try: |
|
|
response = openai.ChatCompletion.create( |
|
|
model=model_choice, |
|
|
messages=[{"role": "user", "content": prompt}] |
|
|
) |
|
|
return response.choices[0].message.content |
|
|
except Exception as e: |
|
|
return f"Error generating quiz: {str(e)}" |
|
|
|
|
|
|
|
|
|
|
|
def get_base64_string_from_image(pil_image): |
|
|
buffered = io.BytesIO() |
|
|
pil_image.save(buffered, format="PNG") |
|
|
return base64.b64encode(buffered.getvalue()).decode("utf-8") |
|
|
|
|
|
|
|
|
|
|
|
def transcribe_audio(audio, openai_api_key): |
|
|
if not openai_api_key: |
|
|
return "Error: No API key provided." |
|
|
openai.api_key = openai_api_key |
|
|
try: |
|
|
with open(audio, 'rb') as f: |
|
|
audio_bytes = f.read() |
|
|
file_obj = io.BytesIO(audio_bytes) |
|
|
file_obj.name = 'audio.wav' |
|
|
transcription = openai.Audio.transcribe(file=file_obj, model="whisper-1") |
|
|
return transcription.text |
|
|
except Exception as e: |
|
|
return f"Error transcribing audio: {str(e)}" |
|
|
|
|
|
|
|
|
|
|
|
def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort, model_choice): |
|
|
if not openai_api_key: |
|
|
return "Error: No API key provided." |
|
|
openai.api_key = openai_api_key |
|
|
|
|
|
if pdf_content and input_text: |
|
|
input_text = f"Based on the document below, answer the question:\n\n{input_text}\n\nDocument:\n{pdf_content}" |
|
|
elif image: |
|
|
image_b64 = get_base64_string_from_image(image) |
|
|
input_text = f"data:image/png;base64,{image_b64}" |
|
|
|
|
|
try: |
|
|
response = openai.ChatCompletion.create( |
|
|
model=model_choice, |
|
|
messages=[{"role": "user", "content": input_text}], |
|
|
max_completion_tokens=2000 |
|
|
) |
|
|
return response.choices[0].message.content |
|
|
except Exception as e: |
|
|
return f"Error calling OpenAI API: {str(e)}" |
|
|
|
|
|
|
|
|
|
|
|
def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, history): |
|
|
if history is None: |
|
|
history = [] |
|
|
|
|
|
if audio: |
|
|
input_text = transcribe_audio(audio, openai_api_key) |
|
|
|
|
|
new_pdf_content = pdf_content |
|
|
if pdf_file: |
|
|
new_pdf_content = extract_text_from_pdf(pdf_file) |
|
|
|
|
|
if pdf_quiz_mode: |
|
|
if new_pdf_content: |
|
|
quiz = generate_mcq_quiz(new_pdf_content, int(num_quiz_questions), openai_api_key, model_choice) |
|
|
history.append((f"π Generated {num_quiz_questions} quiz questions", quiz)) |
|
|
else: |
|
|
history.append(("No PDF detected", "Please upload a PDF file first.")) |
|
|
else: |
|
|
response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice) |
|
|
if input_text: |
|
|
history.append((input_text, response)) |
|
|
elif image: |
|
|
history.append(("πΌοΈ [Image Uploaded]", response)) |
|
|
elif pdf_file: |
|
|
history.append(("π [PDF Uploaded]", response)) |
|
|
else: |
|
|
history.append(("No input", "Please provide input.")) |
|
|
|
|
|
return "", None, None, None, new_pdf_content, history |
|
|
|
|
|
|
|
|
|
|
|
def clear_history(): |
|
|
return "", None, None, None, "", [] |
|
|
|
|
|
|
|
|
|
|
|
def process_pdf(pdf_file): |
|
|
if pdf_file is None: |
|
|
return "" |
|
|
return extract_text_from_pdf(pdf_file) |
|
|
|
|
|
|
|
|
|
|
|
def update_input_type(choice): |
|
|
if choice == "Text": |
|
|
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False) |
|
|
elif choice == "Image": |
|
|
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False) |
|
|
elif choice == "Voice": |
|
|
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value=False) |
|
|
elif choice == "PDF": |
|
|
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value=False) |
|
|
elif choice == "PDF(QUIZ)": |
|
|
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(value=True) |
|
|
|
|
|
|
|
|
|
|
|
def create_interface(): |
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## π§ Multimodal Chatbot β Text | Image | Voice | PDF | Quiz") |
|
|
|
|
|
pdf_content = gr.State("") |
|
|
|
|
|
openai_api_key = gr.Textbox(label="π OpenAI API Key", type="password", placeholder="sk-...") |
|
|
|
|
|
input_type = gr.Radio( |
|
|
["Text", "Image", "Voice", "PDF", "PDF(QUIZ)"], |
|
|
label="Choose Input Type", |
|
|
value="Text" |
|
|
) |
|
|
|
|
|
input_text = gr.Textbox(label="Enter your question or text", lines=2, visible=True) |
|
|
image_input = gr.Image(label="Upload Image", type="pil", visible=False) |
|
|
audio_input = gr.Audio(label="Upload/Record Audio", type="filepath", visible=False) |
|
|
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"], visible=False) |
|
|
quiz_questions_slider = gr.Slider(1, 20, value=5, step=1, label="Number of Quiz Questions", visible=False) |
|
|
quiz_mode = gr.Checkbox(label="Quiz Mode", visible=False, value=False) |
|
|
|
|
|
with gr.Row(): |
|
|
reasoning_effort = gr.Dropdown(["low", "medium", "high"], value="medium", label="Reasoning Effort") |
|
|
model_choice = gr.Dropdown(["o1", "o3-mini"], value="o1", label="Model") |
|
|
|
|
|
submit_btn = gr.Button("Submit") |
|
|
clear_btn = gr.Button("Clear Chat") |
|
|
|
|
|
chat_history = gr.Chatbot(label="Chat History") |
|
|
|
|
|
|
|
|
input_type.change( |
|
|
fn=update_input_type, |
|
|
inputs=[input_type], |
|
|
outputs=[input_text, image_input, audio_input, pdf_input, quiz_questions_slider, quiz_mode] |
|
|
) |
|
|
|
|
|
|
|
|
pdf_input.change(fn=process_pdf, inputs=[pdf_input], outputs=[pdf_content]) |
|
|
|
|
|
|
|
|
submit_btn.click( |
|
|
fn=chatbot, |
|
|
inputs=[input_text, image_input, audio_input, pdf_input, openai_api_key, reasoning_effort, model_choice, pdf_content, quiz_questions_slider, quiz_mode, chat_history], |
|
|
outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history] |
|
|
) |
|
|
|
|
|
|
|
|
clear_btn.click(fn=clear_history, inputs=[], outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]) |
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo = create_interface() |
|
|
demo.launch() |