Spaces:

Nid4l
/

Multi-Input_Academic_Summarizer

Sleeping

App Files Files Community

Multi-Input_Academic_Summarizer / app.py

Nid4l

Update app.py

9843823 verified about 1 year ago

Raw

History Blame Contribute Delete

29.6 kB

	import gradio as gr
	import google.generativeai as genai
	import pdfplumber
	import docx
	import os
	import tempfile
	from faster_whisper import WhisperModel
	import yt_dlp
	import subprocess
	import markdown2
	from bs4 import BeautifulSoup
	from docx import Document
	from docx.shared import Inches
	from docx.oxml.ns import qn
	from docx.oxml import OxmlElement
	import re
	from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
	from reportlab.lib.pagesizes import A4
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.lib.units import cm
	from reportlab.lib.enums import TA_RIGHT
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer

	# --- API Key Configuration with Fallback ---
	# Define a list of potential API key environment variables in order of preference
	API_KEY_ENV_VARS = ["GOOGLE_API_KEY", "GOOGLE_API_KEY_SECONDARY", "ANOTHER_GOOGLE_API_KEY"]
	selected_api_key = None

	for env_var in API_KEY_ENV_VARS:
	key = os.environ.get(env_var)
	if key:
	selected_api_key = key
	print(f"Using API key from {env_var}")
	break

	if selected_api_key:
	genai.configure(api_key=selected_api_key)
	else:
	print("Warning: No Google API key found. Please set GOOGLE_API_KEY or other fallback keys.")
	# You might want to raise an error or disable API-dependent features here
	# For now, it will proceed but API calls will likely fail.

	model_2p0 = genai.GenerativeModel("gemini-2.0-flash")
	model_2p5 = genai.GenerativeModel("gemini-2.5-flash-preview-05-20")

	def check_input_length(text, max_chars=80000):
	if len(text) > max_chars:
	return False, f"⚠️ Input too long ({len(text)} chars). To avoid API errors, trim it down or split into smaller parts."
	return True, ""

	def summarize_text(text, tone, length, language, generation_settings, model_choice):
	length_prompts = {
	"Short": "Keep it concise. Aim for 3–5 bullet points or a quick overview.",
	"Medium": "Provide a balanced summary with key ideas and some supporting details.",
	"Long": "Write a detailed summary covering all major points with explanations."
	}

	tone_prompts = {
	"Gen Z": "Summarize this in a chill, Gen Z-friendly tone. Use slang and emojis if needed.",
	"Professional": "Summarize this in a clear, academic, professional tone.",
	"Balanced": "Summarize this in a simple, neutral tone, not too formal or too casual."
	}

	lang_prompt = {
	"English": "Output the summary in English. USE this clear summary header '## English Summary'",
	"Arabic": "Output the summary directly in Arabic, do not write it in English then translate it. USE this clear summary header '## Arabic Summary'",
	"Both": "First write the summary in English, then repeat it in Arabic. USE clear section headers like '## {language} Summary'."
	}

	prompt = f"{tone_prompts.get(tone)} {length_prompts.get(length)} {lang_prompt.get(language)}\n\n{text}"


	try:
	if model_choice == "Flash 2.0":
	response = model_2p0.generate_content(prompt, generation_config=generation_settings)
	elif model_choice == "Flash 2.5":
	response = model_2p5.generate_content(prompt, generation_config=generation_settings)
	else: # Auto fallback (2.0 -> 2.5) to save the limited 2.5 quota
	try:
	return model_2p0.generate_content(prompt, generation_config=generation_settings).text
	except Exception as e1:
	try:
	return model_2p5.generate_content(prompt, generation_config=generation_settings).text
	except Exception as e2:
	return "❌ Both Gemini models failed. Please try again later or reduce input size."
	return response.text

	except Exception as e:
	return f"❌ {model_choice} failed.\n\n🛠 Error: `{str(e).splitlines()[0]}`\n\nTry again or choose another model."

	whisper_model = WhisperModel("base")

	def extract_text_from_pdf(file_path): # to handle PDFs
	with pdfplumber.open(file_path) as pdf:
	return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())

	def extract_text_from_docx(file_path): # to handle DOCXs
	doc = docx.Document(file_path)
	return "\n".join([para.text for para in doc.paragraphs])

	def transcribe_audio(file_path): # to handle audio and video files
	segments, _ = whisper_model.transcribe(file_path)
	return " ".join([segment.text for segment in segments])

	def download_youtube_audio(youtube_url): # for YouTube videos using their links
	ydl_opts = {
	"format": "bestaudio/best",
	"outtmpl": os.path.join(tempfile.gettempdir(), "yt_audio.%(ext)s"),
	"postprocessors": [{
	"key": "FFmpegExtractAudio",
	"preferredcodec": "mp3",
	"preferredquality": "192"
	}],
	"quiet": True
	}
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([youtube_url])
	return os.path.join(tempfile.gettempdir(), "yt_audio.mp3")

	def handle_input_file(file, _): # aside from the YT links, this function handover all other files to the appropriate function
	filepath = file.name

	ext = filepath.split(".")[-1].lower()

	if ext == "pdf":
	return extract_text_from_pdf(filepath)
	elif ext == "docx":
	return extract_text_from_docx(filepath)
	elif ext in ["mp3", "wav", "m4a", "mp4", "webm", "mov"]:
	return transcribe_audio(filepath)
	else:
	return "❌ Unsupported file format!"

	def save_as_txt(summary, filepath): # to generate a text fileof the summary
	with open(filepath, "w", encoding="utf-8") as f:
	f.write(summary)

	def save_as_docx(summary, filepath): # to generate a well-structured Document (docx) that retain format and style. (Tailored to our summarizing style)
	doc = Document()
	html = markdown2.markdown(summary, extras=["markdown-in-html", "tables"])
	soup = BeautifulSoup(html, "html.parser")

	def contains_arabic(text): # Checks for arabic content
	if text is None:
	return False
	return bool(re.search(r'[\u0600-\u06FF]', text))

	def apply_rtl_to_paragraph(para): # Applies RTL to Arabic sections of the summary (if exist). NOTE: this function couldn't resolve the RTL issue DUE to python-docx styling limitations.
	para.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
	pPr = para._element.get_or_add_pPr()

	# Set BiDi (RTL) for paragraph
	bidi_tag = pPr.find(qn('w:bidi'))
	if bidi_tag is None:
	bidi_tag = OxmlElement('w:bidi')
	pPr.append(bidi_tag)
	bidi_tag.set(qn('w:val'), "1")

	# Set text direction to (RTL) Right-to-Left
	text_direction = pPr.find(qn('w:textDirection'))
	if text_direction is None:
	text_direction = OxmlElement('w:textDirection')
	pPr.append(text_direction)
	text_direction.set(qn('w:val'), 'rl') # 'rl' -> right-to-left

	def apply_ltr_to_paragraph(para): # Applies LTR to English sections of the summary (if exist).
	para.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
	pPr = para._element.get_or_add_pPr()

	# Set BiDi to LTR
	bidi_tag = pPr.find(qn('w:bidi'))
	if bidi_tag is not None:
	bidi_tag.set(qn('w:val'), "0")

	# Set text direction to (LTR) Left-to-Right
	text_direction = pPr.find(qn('w:textDirection'))
	if text_direction is not None:
	pPr.remove(text_direction)

	in_arabic_section = False

	for element in soup.contents:
	if element.name is None: # Handles NavigableString (text nodes between elements)
	if element.string and element.string.strip():
	para = doc.add_paragraph(element.string.strip())
	current_text_is_arabic = contains_arabic(element.string.strip())
	if in_arabic_section or current_text_is_arabic:
	apply_rtl_to_paragraph(para)
	else:
	apply_ltr_to_paragraph(para)
	continue

	text_to_process = element.get_text().strip()
	# Checking whether the current element's text itself contains Arabic or not
	current_text_is_arabic = contains_arabic(text_to_process)

	# State Management for the Arabic Section
	if element.name == "h2":
	if "Arabic Summary" in text_to_process:
	in_arabic_section = True
	para = doc.add_heading(text_to_process, level=2)
	apply_rtl_to_paragraph(para) # Apply RTL to this specific header
	continue # Header processed, move to next element
	elif "English Summary" in text_to_process: # Or other known LTR section headers
	in_arabic_section = False

	# Determining final RTL status for the current element based on section or content
	should_apply_rtl = in_arabic_section or current_text_is_arabic

	# Element Processing
	if element.name == "h1":
	para = doc.add_heading(text_to_process, level=1)
	if should_apply_rtl: apply_rtl_to_paragraph(para)
	else: apply_ltr_to_paragraph(para)
	elif element.name == "h2": # Handles H2s NOT specifically "Arabic Summary"
	para = doc.add_heading(text_to_process, level=2)
	if should_apply_rtl: apply_rtl_to_paragraph(para)
	else: apply_ltr_to_paragraph(para)
	elif element.name == "ul":
	for li in element.find_all("li"):
	list_item_text = li.get_text().strip() # Get raw text for contain_arabic check
	para = doc.add_paragraph(style='ListBullet')
	# Add content of <li>, handling simple bold/italic
	for content_node in li.contents:
	if content_node.name in ['strong', 'b']: run = para.add_run(content_node.get_text()); run.bold = True
	elif content_node.name in ['em', 'i']: run = para.add_run(content_node.get_text()); run.italic = True
	elif content_node.name is None: para.add_run(str(content_node))
	if not para.text.strip() and list_item_text: # if complex li structure -> Fallback
	para.add_run(list_item_text)

	li_text_is_arabic = contains_arabic(list_item_text) # Checking raw text
	if in_arabic_section or li_text_is_arabic:
	apply_rtl_to_paragraph(para)
	else:
	apply_ltr_to_paragraph(para)
	elif element.name == "ol": # Similar handling for ordered lists
	for li in element.find_all("li"):
	list_item_text = li.get_text().strip()
	para = doc.add_paragraph(style='ListNumber')
	for content_node in li.contents:
	if content_node.name in ['strong', 'b']: run = para.add_run(content_node.get_text()); run.bold = True
	elif content_node.name in ['em', 'i']: run = para.add_run(content_node.get_text()); run.italic = True
	elif content_node.name is None: para.add_run(str(content_node))
	if not para.text.strip() and list_item_text:
	para.add_run(list_item_text)

	li_text_is_arabic = contains_arabic(list_item_text)
	if in_arabic_section or li_text_is_arabic:
	apply_rtl_to_paragraph(para)
	else:
	apply_ltr_to_paragraph(para)
	elif element.name == "p":
	para = doc.add_paragraph()
	# Iterate through children of <p> to handle mixed content (bold, italic, text)
	for content_node in element.contents:
	if content_node.name in ['strong', 'b']:
	run = para.add_run(content_node.get_text())
	run.bold = True
	elif content_node.name in ['em', 'i']: # Added italic handling
	run = para.add_run(content_node.get_text())
	run.italic = True
	elif content_node.name is None: # NavigableString (plain text)
	para.add_run(str(content_node))

	# Fallback if element.contents was empty but element.get_text() had text (e.g. <p> </p>)
	if not para.text.strip() and text_to_process:
	para.add_run(text_to_process)

	if should_apply_rtl: apply_rtl_to_paragraph(para)
	else: apply_ltr_to_paragraph(para)

	elif element.name == "div" and "dir" in element.attrs and element.attrs["dir"] == "rtl":
	# Handle children of an explicit <div dir="rtl">
	# This forces RTL for its children regardless of the global 'in_arabic_section' during its scope
	previous_in_arabic_section_state = in_arabic_section # Save state
	in_arabic_section = True # Force RTL for children of this div
	for child_element in element.children: # Iterate direct children
	if child_element.name is None: # Text node within the div
	if child_element.string and child_element.string.strip():
	child_para = doc.add_paragraph(child_element.string.strip())
	apply_rtl_to_paragraph(child_para) # Force RTL
	continue

	child_text = child_element.get_text().strip()
	if not child_text: continue # Skip empty child elements

	# Simplified processing for children of div dir="rtl"
	# Adapt this based on expected content (h2, p, ul, ol etc.)
	if child_element.name == "h2":
	child_para = doc.add_heading(child_text, level=2)
	elif child_element.name == "ul": # Basic ul handling in div
	for li_in_div in child_element.find_all("li"):
	child_para = doc.add_paragraph(li_in_div.get_text().strip(), style='ListBullet')
	apply_rtl_to_paragraph(child_para) # Force RTL
	continue # Handled ul, go to next child of div
	elif child_element.name == "ol": # Basic ol handling in div
	for li_in_div in child_element.find_all("li"):
	child_para = doc.add_paragraph(li_in_div.get_text().strip(), style='ListNumber')
	apply_rtl_to_paragraph(child_para) # Force RTL
	continue # Handled ol, go to next child of div
	else: # Treat other children (like <p>) or unrecognized as <p>
	child_para = doc.add_paragraph()
	for content_node in child_element.contents: # Add content with formatting
	if content_node.name in ['strong', 'b']: run = child_para.add_run(content_node.get_text()); run.bold = True
	elif content_node.name in ['em', 'i']: run = child_para.add_run(content_node.get_text()); run.italic = True
	elif content_node.name is None: child_para.add_run(str(content_node))
	if not child_para.text.strip() and child_text: child_para.add_run(child_text) # Fallback

	apply_rtl_to_paragraph(child_para) # Force RTL due to parent div being dir="rtl"
	in_arabic_section = previous_in_arabic_section_state # Restore state
	else:
	# Fallback for other top-level elements not explicitly handled
	if text_to_process: # Only add if there's actual text
	para = doc.add_paragraph(text_to_process)
	if should_apply_rtl:
	apply_rtl_to_paragraph(para)
	else:
	apply_ltr_to_paragraph(para)

	doc.save(filepath)

	def save_as_pdf(summary, filepath):
	doc = SimpleDocTemplate(filepath, pagesize=A4, rightMargin=2cm, leftMargin=2cm, topMargin=2cm, bottomMargin=2cm)
	styles = getSampleStyleSheet()
	story = []

	paragraphs = summary.split("\n\n")

	for para in paragraphs:
	if "Arabic Summary" in para or any('\u0600' <= c <= '\u06FF' for c in para):
	para_style = ParagraphStyle('Arabic', parent=styles['Normal'], alignment=TA_RIGHT)
	elif para.strip().startswith("##"):
	para_style = styles["Heading2"]
	else:
	para_style = styles["Normal"]

	story.append(Paragraph(para.strip().replace("**", ""), para_style))
	story.append(Spacer(1, 12))

	doc.build(story)

	def evaluate_summary_with_gemini(original_text, generated_summary): # yup, we will use Gemini to rate Gemini!
	# will use Flash 2.0 for evaluation due to higher TPR
	eval_model = model_2p0

	# 1. Faithfulness/Relevance Check
	faithfulness_prompt = f"""
	Original Text: {original_text}

	Generated Summary: {generated_summary}

	Provide a 'Faithfulness Score' from 1 to 5, where 5 is perfectly faithful and 1 is very unfaithful.
	Output only the score as a single digit.
	"""
	readability_prompt = f"""
	Summary: {generated_summary}

	Provide a quality rating from 1 to 5 for coherence and tone, where 5 is excellent and 1 is poor.
	Output only the score as a single digit.
	"""
	try:
	faithfulness_score = int(eval_model.generate_content(faithfulness_prompt).text.strip())
	except:
	faithfulness_score = 1

	try:
	readability_score = int(eval_model.generate_content(readability_prompt).text.strip())
	except:
	readability_score = 1

	overall_score = round((faithfulness_score + readability_score) / 2) # Averaging the scores

	return overall_score

	def create_progress_bar_html(score, max_score=5): # the HTML visual for scores' progress bar
	percentage = (score / max_score) * 100
	# Red for low, Yellow for medium, Green for high.
	# We can calculate color based on percentage to get a gradient effect, (Red at 0%, Yellow at 50%, Green at 100%)
	if percentage <= 50:
	# Linear interpolation from Red (255,0,0) to Yellow (255,255,0)
	r = 255
	g = int(255 * (percentage / 50))
	b = 0
	else:
	# Linear interpolation from Yellow (255,255,0) to Green (0,255,0)
	r = int(255 * ((100 - percentage) / 50))
	g = 255
	b = 0

	color = f"rgb({r},{g},{b})"

	# Dark grey empty bar
	return f"""
	<div style="background-color: #333; border-radius: 5px; height: 25px; width: 100%; overflow: hidden; margin-top: 10px; position: relative;">
	<div style="background: {color}; height: 100%; width: {percentage}%; border-radius: 5px; transition: width 0.5s ease-in-out;"></div>
	<div style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; display: flex; align-items: center; justify-content: center;">
	<span style="font-weight: 800; color: white; background-color: rgba(0,0,0,0.7); padding: 0 6px; border-radius: 10px; font-size: 14px;">
	{score}/{max_score}
	</span>
	</div>
	</div>
	"""

	with gr.Blocks() as UI:
	gr.Markdown("## 🤖💬 Multi-Input Academic Summarizer")

	chat_state = gr.State([])
	extracted_text_state = gr.State("") # stored for re-evaluation

	# 1️⃣ Input Block
	gr.Markdown("### 📂 Input Options")
	with gr.Row():
	with gr.Column(scale=1):
	text_input = gr.Textbox(label="📝 Paste Text", lines=4, placeholder="Paste your text here...")
	clear_text = gr.Button("🧹 Clear Text", scale=0) # scale=0 for minimal button space
	with gr.Column(scale=1):
	file_input = gr.File(label="📎 Upload File")
	with gr.Column(scale=1):
	yt_input = gr.Textbox(label="🔗 YouTube Link", placeholder="http://youtube.com/watch?v=...")
	clear_yt = gr.Button("❌ Clear YouTube Link", scale=0)


	# 2️⃣ Settings Block
	gr.Markdown("### ⚙️ Summary Settings")
	with gr.Row():
	tone = gr.Radio(["Gen Z", "Balanced", "Professional"], label="🎭 Tone", value="Balanced")
	length = gr.Radio(["Short", "Medium", "Long"], label="📏 Length", value="Medium")
	creativity = gr.Radio(["Low", "Medium", "High"], label="🎨 Creativity Level", value="Medium")

	with gr.Row():
	language = gr.Radio(["English", "Arabic", "Both"], label="🌐 Output Language", value="English")
	model_choice = gr.Radio(["Auto (2.0 → 2.5)", "Flash 2.0", "Flash 2.5"], label="🤖 Gemini Model", value="Auto (2.0 → 2.5)")
	file_formats = gr.CheckboxGroup(
	choices=["TXT", "PDF", "DOCX"],
	value=["TXT", "PDF", "DOCX"], # Default: all selected
	label="📁 Output Formats",
	interactive=True
	)


	# 3️⃣ Sumbit Button
	submit_btn = gr.Button("🚀 Summarize")

	# 4️⃣ Chat Output
	gr.Markdown("### 💬 Summary Output")
	chatbot = gr.Chatbot(label="📚 Summary Assistant", type='tuples')
	# New Evaluation Output
	gr.Markdown("### ✨ Summary Quality Assessment (By Gemini)")
	with gr.Column():
	overall_quality_bar = gr.HTML(label="Overall Quality", value=create_progress_bar_html(0))

	# 5️⃣ Download Files
	gr.Markdown("### 📥 Download Your Summary")
	with gr.Row():
	txt_out = gr.File(label="📄 TXT")
	pdf_out = gr.File(label="📄 PDF")
	docx_out = gr.File(label="📄 DOCX")

	def chat_handler(history, text, file, yt, tone, length, language, creativity_level, model_choice, file_formats, extracted_text_state_val):
	extracted = ""

	# Reset evaluation output visibility
	overall_quality_bar_html = create_progress_bar_html(0)

	inputs_filled = sum([
	bool(text.strip()),
	file is not None,
	bool(yt.strip())
	])

	if inputs_filled > 1:
	return history + [("⚠️ Please use only one input method at a time (Text, File, or YouTube).", "")], None, None, None, None, None


	if yt:
	return history + [("⚠️ YouTube support is currently disabled on Hugging Face Spaces due to platform restrictions.", "")], None, None, None, None, None
	#audio_path = download_youtube_audio(yt)
	#extracted = transcribe_audio(audio_path)
	#user_msg = f"🔗 YouTube Link Provided\nModel: {model_choice}, Tone: {tone}, Length: {length}, Creativity: {creativity_level}, Language: {language}"

	elif file is not None:
	extracted = handle_input_file(file, None)
	user_msg = f"📄 File: `{file.name}`\nModel: {model_choice}, Tone: {tone}, Length: {length}, Creativity: {creativity_level}, Language: {language}"

	elif text.strip():
	extracted = text
	user_msg = f"📝 Text Input: {text[:60]}...\nModel: {model_choice}, Tone: {tone}, Length: {length}, Creativity: {creativity_level}, Language: {language}"

	else:
	return history + [("⚠️ No valid input provided.", "")], None, None, None, None, None

	valid, msg = check_input_length(extracted)
	if not valid:
	return history + [(user_msg, msg)], None, None, None, None, None

	# Updating the state with the extracted text for potential re-evaluation
	extracted_text_state_val = extracted

	# Mapping creativity level to temperature/top_p
	creativity_map = {
	"Low": (0.2, 0.7),
	"Medium": (0.7, 0.9),
	"High": (1.0, 1.0)
	}
	temp, top_p = creativity_map.get(creativity_level, (0.7, 0.9))

	generation_settings = genai.types.GenerationConfig(
	temperature=temp,
	top_p=top_p,
	#max_output_tokens=1000,
	#frequency_penalty=0.1,
	#presence_penalty=0.1
	)

	summary = summarize_text(extracted, tone, length, language, generation_settings, model_choice)

	# Performing Gemini-based evaluation (Summary Quality Assesment)
	overall_score = 0
	if "❌" not in summary: # Only evaluate if summary generation was successful, obviously
	try:
	overall_score = evaluate_summary_with_gemini(extracted, summary)
	except:
	overall_score = 1 # Default to lowest score IF error

	overall_quality_bar_html = create_progress_bar_html(overall_score)

	# Saving files based on selected formats
	txt_path = None
	pdf_path = None
	docx_path = None
	temp_dir = tempfile.gettempdir()

	if "TXT" in file_formats:
	txt_path = os.path.join(temp_dir, "summary.txt")
	save_as_txt(summary, txt_path)

	if "PDF" in file_formats:
	pdf_path = os.path.join(temp_dir, "summary.pdf")
	save_as_pdf(summary, pdf_path)

	if "DOCX" in file_formats:
	docx_path = os.path.join(temp_dir, "summary.docx")
	save_as_docx(summary, docx_path)

	# Bot Response Formatting for Gradio Chatbot
	final_summary_content = summary # raw summary

	if "❌" not in summary:
	english_header_tag = "## English Summary"
	arabic_header_tag = "## Arabic Summary"

	english_header_start = summary.find(english_header_tag)
	arabic_header_start = summary.find(arabic_header_tag)

	if english_header_start != -1 and arabic_header_start != -1:
	# Case 1: Both English and Arabic sections are present
	english_section_raw = ""
	arabic_section_raw = ""

	if english_header_start < arabic_header_start:
	english_section_raw = summary[english_header_start : arabic_header_start].strip()
	arabic_section_raw = summary[arabic_header_start:].strip()
	else:
	arabic_section_raw = summary[arabic_header_start : english_header_start].strip()
	english_section_raw = summary[english_header_start:].strip()

	# Processing Arabic section:
	# 1. Extracting header and content
	arabic_lines = arabic_section_raw.split('\n')
	arabic_header_line = arabic_lines[0] if arabic_lines else ""
	arabic_content_lines = arabic_lines[1:] if len(arabic_lines) > 1 else []

	# Replacing Markdown header '##' with HTML <h2> for direct rendering within the div
	if arabic_header_line.startswith('## '):
	arabic_header_html = f'<h2>{arabic_header_line[3:].strip()}</h2>'
	else:
	arabic_header_html = f'<p>{arabic_header_line.strip()}</p>' # Fallback for non-## headers

	arabic_content_html = "\n".join(arabic_content_lines)

	# Wrapping the processed Arabic content in an RTL div
	arabic_html_block = f'<div dir="rtl" style="text-align: right; direction: rtl;">{arabic_header_html}\n{arabic_content_html}</div>'

	# Combining the original English section (Markdown) with the HTML-wrapped Arabic section
	final_summary_content = f"{english_section_raw}\n\n{arabic_html_block}"

	elif arabic_header_start != -1 or any('\u0600' <= char <= '\u06FF' for char in summary):
	# Case 2: Only Arabic summary generated or primarily Arabic
	# Attemptting to process header if it exists
	if arabic_header_start != -1:
	arabic_lines = summary.split('\n')
	arabic_header_line = arabic_lines[0]
	arabic_content_lines = arabic_lines[1:]
	if arabic_header_line.startswith('## '):
	arabic_header_html = f'<h2>{arabic_header_line[3:].strip()}</h2>'
	else:
	arabic_header_html = f'<p>{arabic_header_line.strip()}</p>'
	content_to_wrap = "\n".join(arabic_content_lines)
	final_summary_content = f'<div dir="rtl" style="text-align: right; direction: rtl;">{arabic_header_html}\n{content_to_wrap}</div>'
	else:
	# No header, just wrap everything in RTL div
	final_summary_content = f'<div dir="rtl" style="text-align: right; direction: rtl;">{summary}</div>'

	else:
	# Case: Only English summary generated
	final_summary_content = summary # No special wrapping needed, Markdown will work directly

	# Final bot response string passed to Gradio chatbot
	bot_response = f"""📌 Summary:\n\n{final_summary_content}\n\n📎 Download your files below 👇""" if "❌" not in summary else summary

	history.append((user_msg, bot_response))
	return history, txt_path, pdf_path, docx_path, extracted_text_state_val, overall_quality_bar_html


	# Text Boxes Clear buttons logic:
	clear_text.click(fn=lambda: "", inputs=[], outputs=[text_input])
	clear_yt.click(fn=lambda: "", inputs=[], outputs=[yt_input])

	# 🔧 Submit Logic
	submit_btn.click(
	fn=chat_handler,
	inputs=[
	chat_state, text_input, file_input, yt_input, tone, length, language, creativity, model_choice, file_formats,
	extracted_text_state
	],
	outputs=[
	chatbot, txt_out, pdf_out, docx_out,
	extracted_text_state, overall_quality_bar
	]
	)

	if __name__ == "__main__":
	UI.launch()