Spaces:

sajjadrahman56
/

multi_model_chat_bot

Sleeping

App Files Files Community

multi_model_chat_bot / app.py

sajjadrahman56

Update app.py

4d40501 verified 8 months ago

raw

history blame contribute delete

11.4 kB

	import gradio as gr
	import openai
	import base64
	from PIL import Image
	import io
	import fitz

	# ---------- PDF Text Extraction ----------
	def extract_text_from_pdf(pdf_file):
	try:
	text = ""
	pdf_document = fitz.open(pdf_file)
	for page in pdf_document:
	text += page.get_text()
	pdf_document.close()
	return text
	except Exception as e:
	return f"Error extracting text from PDF: {str(e)}"

	# ---------- PDF Quiz Generation ----------
	def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
	if not openai_api_key:
	return "Error: No API key provided."

	openai.api_key = openai_api_key
	limited_content = pdf_content[:8000] if len(pdf_content) > 8000 else pdf_content

	prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions.
	For each question:
	1. Create a clear question based on key concepts in the document
	2. Provide 4 options (A, B, C, D)
	3. Indicate the correct answer
	4. Briefly explain the correct answer
	Document content:
	{limited_content}
	"""

	try:
	response = openai.ChatCompletion.create(
	model=model_choice,
	messages=[{"role": "user", "content": prompt}]
	)
	return response.choices[0].message.content
	except Exception as e:
	return f"Error generating quiz: {str(e)}"

	# ---------- Image Processing ----------
	def generate_image_response(input_text, image, openai_api_key, model_choice):
	if not openai_api_key:
	return "Error: No API key provided."

	openai.api_key = openai_api_key

	# Convert image to base64
	buffered = io.BytesIO()
	image.save(buffered, format="PNG")
	base64_str = base64.b64encode(buffered.getvalue()).decode("utf-8")

	try:
	response = openai.ChatCompletion.create(
	model=model_choice,
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": input_text},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_str}"}}
	]
	}
	]
	)
	return response.choices[0].message.content
	except Exception as e:
	return f"Error processing image: {str(e)}"

	# ---------- Voice Processing ----------
	def process_voice_input(audio_path, openai_api_key, model_choice):
	if not openai_api_key:
	return "Error: No API key provided."

	try:
	openai.api_key = openai_api_key
	audio_file = open(audio_path, "rb")
	transcript = openai.Audio.transcribe("whisper-1", audio_file)
	prompt = transcript["text"]
	audio_file.close()

	response = openai.ChatCompletion.create(
	model=model_choice,
	messages=[{"role": "user", "content": prompt}]
	)
	return response.choices[0].message.content, prompt
	except Exception as e:
	return f"Error processing voice: {str(e)}", ""

	# ---------- Unified Chatbot Handler ----------
	def chatbot(input_text, image, pdf_file, audio_file, openai_api_key, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, audio_mode, history):
	if history is None:
	history = []

	new_pdf_content = pdf_content

	# Handle PDF file upload and extract text
	if pdf_file is not None:
	new_pdf_content = extract_text_from_pdf(pdf_file)

	# Handle PDF Quiz Mode
	if pdf_quiz_mode:
	if new_pdf_content:
	quiz_response = generate_mcq_quiz(new_pdf_content, int(num_quiz_questions), openai_api_key, model_choice)
	history.append((f"👤: [PDF Quiz - {num_quiz_questions} questions]", f"🤖: {quiz_response}"))
	else:
	history.append(("👤: [PDF Quiz]", "🤖: Please upload a PDF file first."))

	# Handle Audio Mode
	elif audio_mode:
	if audio_file is not None:
	response, transcribed_text = process_voice_input(audio_file, openai_api_key, model_choice)
	history.append((f"👤 (Voice): {transcribed_text}", f"🤖: {response}"))
	else:
	history.append(("👤: [Audio]", "🤖: Please upload or record an audio file."))

	# Handle Image Mode
	else:
	if image is not None:
	response = generate_image_response(input_text, image, openai_api_key, model_choice)
	history.append((f"👤: {input_text or '[Image]'}", f"🤖: {response}"))
	elif input_text:
	# Handle text-only input when no image is provided
	try:
	openai.api_key = openai_api_key
	response = openai.ChatCompletion.create(
	model=model_choice,
	messages=[{"role": "user", "content": input_text}]
	)
	history.append((f"👤: {input_text}", f"🤖: {response.choices[0].message.content}"))
	except Exception as e:
	history.append((f"👤: {input_text}", f"🤖: Error: {str(e)}"))

	return "", None, None, None, new_pdf_content, history

	# ---------- Clear Chat ----------
	def clear_history():
	return "", None, None, None, "", []

	# ---------- Input Type Toggle ----------
	def update_input_type(choice):
	if choice == "Image":
	hint_text = """
	💡 Image Mode Tips:
	- Both o1 and o3-mini support image analysis
	- o1 provides more detailed analysis but costs more
	- o3-mini is faster and more cost-effective for simple image questions
	"""
	return (
	gr.update(visible=True), # input_text
	gr.update(visible=True), # image_input
	gr.update(visible=False), # pdf_input
	gr.update(visible=False), # audio_input
	gr.update(visible=False), # quiz_slider
	gr.update(value=False), # pdf_quiz_mode
	gr.update(value=False), # audio_mode
	gr.update(value=hint_text, visible=True) # model_hint
	)
	elif choice == "PDF(QUIZ)":
	hint_text = """
	📚 PDF Quiz Mode Tips:
	- Both models can generate quizzes from PDF content
	- o1 creates more comprehensive and detailed questions
	- o3-mini generates quizzes faster with good quality
	- Large PDFs are automatically limited to first 8000 characters
	"""
	return (
	gr.update(visible=False), # input_text
	gr.update(visible=False), # image_input
	gr.update(visible=True), # pdf_input
	gr.update(visible=False), # audio_input
	gr.update(visible=True), # quiz_slider
	gr.update(value=True), # pdf_quiz_mode
	gr.update(value=False), # audio_mode
	gr.update(value=hint_text, visible=True) # model_hint
	)
	elif choice == "Audio":
	hint_text = """
	🎤 Audio Mode Tips:
	- Important: Audio transcription uses OpenAI's `whisper-1` model (separate cost)
	- gpt-4 transcribe: More sophisticated responses but higher cost per token
	- gpt-4-mini-transcribe: Cost-effective for most audio conversations
	- Supports common audio formats (MP3, WAV, M4A, etc.)
	- Maximum audio file size: 25MB
	"""

	return (
	gr.update(visible=False), # input_text
	gr.update(visible=False), # image_input
	gr.update(visible=False), # pdf_input
	gr.update(visible=True), # audio_input
	gr.update(visible=False), # quiz_slider
	gr.update(value=False), # pdf_quiz_mode
	gr.update(value=True), # audio_mode
	gr.update(value=hint_text, visible=True) # model_hint
	)

	# ---------- CSS Styling ----------
	custom_css = """
	.gradio-container {
	font-family: 'Arial', sans-serif;
	background-color: #f0f4f8;
	}
	.gradio-header {
	background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%);
	color: white;
	padding: 20px;
	border-radius: 8px;
	text-align: center;
	}
	#submit-btn {
	background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%);
	color: white;
	border-radius: 8px;
	}
	#clear-history {
	background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%);
	color: white;
	border-radius: 8px;
	}
	"""

	# ---------- UI Interface ----------
	def create_interface():
	with gr.Blocks(css=custom_css) as demo:
	gr.Markdown("""
	<div class="gradio-header">
	<h1>Multimodal Chatbot (Image + PDF Quiz + Voice)</h1>
	<h3>Ask via image, PDF, or voice</h3>
	</div>
	""")

	with gr.Accordion("Instructions", open=False):
	gr.Markdown("""
	- Image Chat: Upload an image and ask about it
	- PDF Quiz: Upload a PDF and generate MCQs
	- Audio Chat: Upload or record audio to chat
	- Always provide your OpenAI API key
	""")

	# State variables
	pdf_content = gr.State("")

	with gr.Row():
	openai_api_key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")

	with gr.Row():
	input_type = gr.Radio(["Image", "PDF(QUIZ)", "Audio"], label="Input Type", value="Image")

	# Model-specific hints that appear based on input type
	model_hint = gr.Markdown("", visible=False)

	# Input components row - all in one organized row
	with gr.Row():
	input_text = gr.Textbox(label="Question (for images)", visible=True)
	image_input = gr.Image(label="Upload Image", type="pil", visible=True)
	pdf_input = gr.File(label="Upload PDF", visible=False)
	audio_input = gr.Audio(label="Upload/Record Audio", type="filepath", visible=False)
	quiz_slider = gr.Slider(1, 20, value=5, step=1, label="Number of Questions", visible=False)

	# Hidden state components for mode control
	pdf_quiz_mode = gr.Checkbox(visible=False, value=False)
	audio_mode = gr.Checkbox(visible=False, value=False)

	with gr.Row():
	model_choice = gr.Dropdown(["o1", "o3-mini","whisper-1", "gpt-4o-mini-transcribe", "gpt-4o-transcribe",], label="Model", value="o1")
	submit_btn = gr.Button("Submit", elem_id="submit-btn")
	clear_btn = gr.Button("Clear History", elem_id="clear-history")

	chat_history = gr.Chatbot()

	# Event handlers
	input_type.change(
	update_input_type,
	inputs=[input_type],
	outputs=[input_text, image_input, pdf_input, audio_input, quiz_slider, pdf_quiz_mode, audio_mode, model_hint]
	)

	submit_btn.click(
	chatbot,
	inputs=[input_text, image_input, pdf_input, audio_input, openai_api_key, model_choice,
	pdf_content, quiz_slider, pdf_quiz_mode, audio_mode, chat_history],
	outputs=[input_text, image_input, pdf_input, audio_input, pdf_content, chat_history]
	)

	clear_btn.click(
	clear_history,
	outputs=[input_text, image_input, pdf_input, audio_input, pdf_content, chat_history]
	)

	return demo

	# ---------- Launch ----------
	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()