Spaces:

sajjadrahman56
/

multi_model_chat_bot

Sleeping

App Files Files Community

sajjadrahman56 commited on May 31, 2025

Commit

a047f73

verified ·

1 Parent(s): c26d69b

create app.py file - main code file

Browse files

Files changed (1) hide show

app.py +302 -0

app.py ADDED Viewed

	@@ -0,0 +1,302 @@

+import gradio as gr
+import openai
+import base64
+from PIL import Image
+import io
+import pymupdf as fitz
+# ---------- PDF Text Extraction ----------
+def extract_text_from_pdf(pdf_file):
+    try:
+        text = ""
+        pdf_document = fitz.open(pdf_file)
+        for page in pdf_document:
+            text += page.get_text()
+        pdf_document.close()
+        return text
+    except Exception as e:
+        return f"Error extracting text from PDF: {str(e)}"
+# ---------- PDF Quiz Generation ----------
+def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
+    if not openai_api_key:
+        return "Error: No API key provided."
+    openai.api_key = openai_api_key
+    limited_content = pdf_content[:8000] if len(pdf_content) > 8000 else pdf_content
+    prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions.
+For each question:
+1. Create a clear question based on key concepts in the document
+2. Provide 4 options (A, B, C, D)
+3. Indicate the correct answer
+4. Briefly explain the correct answer
+Document content:
+{limited_content}
+"""
+    try:
+        response = openai.ChatCompletion.create(
+            model=model_choice,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Error generating quiz: {str(e)}"
+# ---------- Image Processing ----------
+def generate_image_response(input_text, image, openai_api_key, model_choice):
+    if not openai_api_key:
+        return "Error: No API key provided."
+    openai.api_key = openai_api_key
+    # Convert image to base64
+    buffered = io.BytesIO()
+    image.save(buffered, format="PNG")
+    base64_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    try:
+        response = openai.ChatCompletion.create(
+            model=model_choice,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": input_text},
+                        {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_str}"}}
+                    ]
+                }
+            ]
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
+# ---------- Voice Processing ----------
+def process_voice_input(audio_path, openai_api_key, model_choice):
+    if not openai_api_key:
+        return "Error: No API key provided."
+    try:
+        openai.api_key = openai_api_key
+        audio_file = open(audio_path, "rb")
+        transcript = openai.Audio.transcribe("whisper-1", audio_file)
+        prompt = transcript["text"]
+        audio_file.close()
+        response = openai.ChatCompletion.create(
+            model=model_choice,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return response.choices[0].message.content, prompt
+    except Exception as e:
+        return f"Error processing voice: {str(e)}", ""
+# ---------- Unified Chatbot Handler ----------
+def chatbot(input_text, image, pdf_file, audio_file, openai_api_key, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, audio_mode, history):
+    if history is None:
+        history = []
+    new_pdf_content = pdf_content
+    # Handle PDF file upload and extract text
+    if pdf_file is not None:
+        new_pdf_content = extract_text_from_pdf(pdf_file)
+    # Handle PDF Quiz Mode
+    if pdf_quiz_mode:
+        if new_pdf_content:
+            quiz_response = generate_mcq_quiz(new_pdf_content, int(num_quiz_questions), openai_api_key, model_choice)
+            history.append((f"👤: [PDF Quiz - {num_quiz_questions} questions]", f"🤖: {quiz_response}"))
+        else:
+            history.append(("👤: [PDF Quiz]", "🤖: Please upload a PDF file first."))
+    # Handle Audio Mode
+    elif audio_mode:
+        if audio_file is not None:
+            response, transcribed_text = process_voice_input(audio_file, openai_api_key, model_choice)
+            history.append((f"👤 (Voice): {transcribed_text}", f"🤖: {response}"))
+        else:
+            history.append(("👤: [Audio]", "🤖: Please upload or record an audio file."))
+    # Handle Image Mode
+    else:
+        if image is not None:
+            response = generate_image_response(input_text, image, openai_api_key, model_choice)
+            history.append((f"👤: {input_text or '[Image]'}", f"🤖: {response}"))
+        elif input_text:
+            # Handle text-only input when no image is provided
+            try:
+                openai.api_key = openai_api_key
+                response = openai.ChatCompletion.create(
+                    model=model_choice,
+                    messages=[{"role": "user", "content": input_text}]
+                )
+                history.append((f"👤: {input_text}", f"🤖: {response.choices[0].message.content}"))
+            except Exception as e:
+                history.append((f"👤: {input_text}", f"🤖: Error: {str(e)}"))
+    return "", None, None, None, new_pdf_content, history
+# ---------- Clear Chat ----------
+def clear_history():
+    return "", None, None, None, "", []
+# ---------- Input Type Toggle ----------
+def update_input_type(choice):
+    if choice == "Image":
+        hint_text = """
+        💡 **Image Mode Tips:**
+        - Both **o1** and **o3-mini** support image analysis
+        - o1 provides more detailed analysis but costs more
+        - o3-mini is faster and more cost-effective for simple image questions
+        """
+        return (
+            gr.update(visible=True),   # input_text
+            gr.update(visible=True),   # image_input
+            gr.update(visible=False),  # pdf_input
+            gr.update(visible=False),  # audio_input
+            gr.update(visible=False),  # quiz_slider
+            gr.update(value=False),    # pdf_quiz_mode
+            gr.update(value=False),    # audio_mode
+            gr.update(value=hint_text, visible=True)  # model_hint
+        )
+    elif choice == "PDF(QUIZ)":
+        hint_text = """
+        📚 **PDF Quiz Mode Tips:**
+        - Both models can generate quizzes from PDF content
+        - o1 creates more comprehensive and detailed questions
+        - o3-mini generates quizzes faster with good quality
+        - Large PDFs are automatically limited to first 8000 characters
+        """
+        return (
+            gr.update(visible=False),  # input_text
+            gr.update(visible=False),  # image_input
+            gr.update(visible=True),   # pdf_input
+            gr.update(visible=False),  # audio_input
+            gr.update(visible=True),   # quiz_slider
+            gr.update(value=True),     # pdf_quiz_mode
+            gr.update(value=False),    # audio_mode
+            gr.update(value=hint_text, visible=True)  # model_hint
+        )
+    elif choice == "Audio":
+        hint_text = """
+        🎤 **Audio Mode Tips:**
+        - **Important:** Audio transcription uses OpenAI's `whisper-1` model (separate cost)
+        - **gpt-4 transcribe**: More sophisticated responses but higher cost per token
+        - **gpt-4-mini-transcribe**: Cost-effective for most audio conversations
+        - Supports common audio formats (MP3, WAV, M4A, etc.)
+        - Maximum audio file size: 25MB
+        """
+        return (
+            gr.update(visible=False),  # input_text
+            gr.update(visible=False),  # image_input
+            gr.update(visible=False),  # pdf_input
+            gr.update(visible=True),   # audio_input
+            gr.update(visible=False),  # quiz_slider
+            gr.update(value=False),    # pdf_quiz_mode
+            gr.update(value=True),     # audio_mode
+            gr.update(value=hint_text, visible=True)  # model_hint
+        )
+# ---------- CSS Styling ----------
+custom_css = """
+.gradio-container {
+    font-family: 'Arial', sans-serif;
+    background-color: #f0f4f8;
+}
+.gradio-header {
+    background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%);
+    color: white;
+    padding: 20px;
+    border-radius: 8px;
+    text-align: center;
+}
+#submit-btn {
+    background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%);
+    color: white;
+    border-radius: 8px;
+}
+#clear-history {
+    background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%);
+    color: white;
+    border-radius: 8px;
+}
+"""
+# ---------- UI Interface ----------
+def create_interface():
+    with gr.Blocks(css=custom_css) as demo:
+        gr.Markdown("""
+            <div class="gradio-header">
+                <h1>Multimodal Chatbot (Image + PDF Quiz + Voice)</h1>
+                <h3>Ask via image, PDF, or voice</h3>
+            </div>
+        """)
+        with gr.Accordion("Instructions", open=False):
+            gr.Markdown("""
+                - **Image Chat**: Upload an image and ask about it
+                - **PDF Quiz**: Upload a PDF and generate MCQs
+                - **Audio Chat**: Upload or record audio to chat
+                - Always provide your OpenAI API key
+            """)
+        # State variables
+        pdf_content = gr.State("")
+        with gr.Row():
+            openai_api_key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
+        with gr.Row():
+            input_type = gr.Radio(["Image", "PDF(QUIZ)", "Audio"], label="Input Type", value="Image")
+        # Model-specific hints that appear based on input type
+        model_hint = gr.Markdown("", visible=False)
+        # Input components row - all in one organized row
+        with gr.Row():
+            input_text = gr.Textbox(label="Question (for images)", visible=True)
+            image_input = gr.Image(label="Upload Image", type="pil", visible=True)
+            pdf_input = gr.File(label="Upload PDF", visible=False)
+            audio_input = gr.Audio(label="Upload/Record Audio", type="filepath", visible=False)
+            quiz_slider = gr.Slider(1, 20, value=5, step=1, label="Number of Questions", visible=False)
+        # Hidden state components for mode control
+        pdf_quiz_mode = gr.Checkbox(visible=False, value=False)
+        audio_mode = gr.Checkbox(visible=False, value=False)
+        with gr.Row():
+            model_choice = gr.Dropdown(["o1", "o3-mini","whisper-1", "gpt-4o-mini-transcribe", "gpt-4o-transcribe",], label="Model", value="o1")
+            submit_btn = gr.Button("Submit", elem_id="submit-btn")
+            clear_btn = gr.Button("Clear History", elem_id="clear-history")
+        chat_history = gr.Chatbot()
+        # Event handlers
+        input_type.change(
+            update_input_type,
+            inputs=[input_type],
+            outputs=[input_text, image_input, pdf_input, audio_input, quiz_slider, pdf_quiz_mode, audio_mode, model_hint]
+        )
+        submit_btn.click(
+            chatbot,
+            inputs=[input_text, image_input, pdf_input, audio_input, openai_api_key, model_choice,
+                   pdf_content, quiz_slider, pdf_quiz_mode, audio_mode, chat_history],
+            outputs=[input_text, image_input, pdf_input, audio_input, pdf_content, chat_history]
+        )
+        clear_btn.click(
+            clear_history,
+            outputs=[input_text, image_input, pdf_input, audio_input, pdf_content, chat_history]
+        )
+    return demo
+# ---------- Launch ----------
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()