import gradio as gr import google.generativeai as genai import pdfplumber import docx import os import tempfile from faster_whisper import WhisperModel import yt_dlp import subprocess import markdown2 from bs4 import BeautifulSoup from docx import Document from docx.shared import Inches from docx.oxml.ns import qn from docx.oxml import OxmlElement import re from docx.enum.text import WD_PARAGRAPH_ALIGNMENT from reportlab.lib.pagesizes import A4 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.units import cm from reportlab.lib.enums import TA_RIGHT from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer # --- API Key Configuration with Fallback --- # Define a list of potential API key environment variables in order of preference API_KEY_ENV_VARS = ["GOOGLE_API_KEY", "GOOGLE_API_KEY_SECONDARY", "ANOTHER_GOOGLE_API_KEY"] selected_api_key = None for env_var in API_KEY_ENV_VARS: key = os.environ.get(env_var) if key: selected_api_key = key print(f"Using API key from {env_var}") break if selected_api_key: genai.configure(api_key=selected_api_key) else: print("Warning: No Google API key found. Please set GOOGLE_API_KEY or other fallback keys.") # You might want to raise an error or disable API-dependent features here # For now, it will proceed but API calls will likely fail. model_2p0 = genai.GenerativeModel("gemini-2.0-flash") model_2p5 = genai.GenerativeModel("gemini-2.5-flash-preview-05-20") def check_input_length(text, max_chars=80000): if len(text) > max_chars: return False, f"⚠️ Input too long ({len(text)} chars). To avoid API errors, trim it down or split into smaller parts." return True, "" def summarize_text(text, tone, length, language, generation_settings, model_choice): length_prompts = { "Short": "Keep it concise. Aim for 3–5 bullet points or a quick overview.", "Medium": "Provide a balanced summary with key ideas and some supporting details.", "Long": "Write a detailed summary covering all major points with explanations." } tone_prompts = { "Gen Z": "Summarize this in a chill, Gen Z-friendly tone. Use slang and emojis if needed.", "Professional": "Summarize this in a clear, academic, professional tone.", "Balanced": "Summarize this in a simple, neutral tone, not too formal or too casual." } lang_prompt = { "English": "Output the summary in English. USE this clear summary header '## English Summary'", "Arabic": "Output the summary directly in Arabic, do not write it in English then translate it. USE this clear summary header '## Arabic Summary'", "Both": "First write the summary in English, then repeat it in Arabic. USE clear section headers like '## {language} Summary'." } prompt = f"{tone_prompts.get(tone)} {length_prompts.get(length)} {lang_prompt.get(language)}\n\n{text}" try: if model_choice == "Flash 2.0": response = model_2p0.generate_content(prompt, generation_config=generation_settings) elif model_choice == "Flash 2.5": response = model_2p5.generate_content(prompt, generation_config=generation_settings) else: # Auto fallback (2.0 -> 2.5) to save the limited 2.5 quota try: return model_2p0.generate_content(prompt, generation_config=generation_settings).text except Exception as e1: try: return model_2p5.generate_content(prompt, generation_config=generation_settings).text except Exception as e2: return "❌ Both Gemini models failed. Please try again later or reduce input size." return response.text except Exception as e: return f"❌ {model_choice} failed.\n\n🛠 Error: `{str(e).splitlines()[0]}`\n\nTry again or choose another model." whisper_model = WhisperModel("base") def extract_text_from_pdf(file_path): # to handle PDFs with pdfplumber.open(file_path) as pdf: return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text()) def extract_text_from_docx(file_path): # to handle DOCXs doc = docx.Document(file_path) return "\n".join([para.text for para in doc.paragraphs]) def transcribe_audio(file_path): # to handle audio and video files segments, _ = whisper_model.transcribe(file_path) return " ".join([segment.text for segment in segments]) def download_youtube_audio(youtube_url): # for YouTube videos using their links ydl_opts = { "format": "bestaudio/best", "outtmpl": os.path.join(tempfile.gettempdir(), "yt_audio.%(ext)s"), "postprocessors": [{ "key": "FFmpegExtractAudio", "preferredcodec": "mp3", "preferredquality": "192" }], "quiet": True } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([youtube_url]) return os.path.join(tempfile.gettempdir(), "yt_audio.mp3") def handle_input_file(file, _): # aside from the YT links, this function handover all other files to the appropriate function filepath = file.name ext = filepath.split(".")[-1].lower() if ext == "pdf": return extract_text_from_pdf(filepath) elif ext == "docx": return extract_text_from_docx(filepath) elif ext in ["mp3", "wav", "m4a", "mp4", "webm", "mov"]: return transcribe_audio(filepath) else: return "❌ Unsupported file format!" def save_as_txt(summary, filepath): # to generate a text fileof the summary with open(filepath, "w", encoding="utf-8") as f: f.write(summary) def save_as_docx(summary, filepath): # to generate a well-structured Document (docx) that retain format and style. (Tailored to our summarizing style) doc = Document() html = markdown2.markdown(summary, extras=["markdown-in-html", "tables"]) soup = BeautifulSoup(html, "html.parser") def contains_arabic(text): # Checks for arabic content if text is None: return False return bool(re.search(r'[\u0600-\u06FF]', text)) def apply_rtl_to_paragraph(para): # Applies RTL to Arabic sections of the summary (if exist). **NOTE**: this function couldn't resolve the RTL issue DUE to python-docx styling limitations. para.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT pPr = para._element.get_or_add_pPr() # Set BiDi (RTL) for paragraph bidi_tag = pPr.find(qn('w:bidi')) if bidi_tag is None: bidi_tag = OxmlElement('w:bidi') pPr.append(bidi_tag) bidi_tag.set(qn('w:val'), "1") # Set text direction to (RTL) Right-to-Left text_direction = pPr.find(qn('w:textDirection')) if text_direction is None: text_direction = OxmlElement('w:textDirection') pPr.append(text_direction) text_direction.set(qn('w:val'), 'rl') # 'rl' -> right-to-left def apply_ltr_to_paragraph(para): # Applies LTR to English sections of the summary (if exist). para.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT pPr = para._element.get_or_add_pPr() # Set BiDi to LTR bidi_tag = pPr.find(qn('w:bidi')) if bidi_tag is not None: bidi_tag.set(qn('w:val'), "0") # Set text direction to (LTR) Left-to-Right text_direction = pPr.find(qn('w:textDirection')) if text_direction is not None: pPr.remove(text_direction) in_arabic_section = False for element in soup.contents: if element.name is None: # Handles NavigableString (text nodes between elements) if element.string and element.string.strip(): para = doc.add_paragraph(element.string.strip()) current_text_is_arabic = contains_arabic(element.string.strip()) if in_arabic_section or current_text_is_arabic: apply_rtl_to_paragraph(para) else: apply_ltr_to_paragraph(para) continue text_to_process = element.get_text().strip() # Checking whether the current element's text itself contains Arabic or not current_text_is_arabic = contains_arabic(text_to_process) # State Management for the Arabic Section if element.name == "h2": if "Arabic Summary" in text_to_process: in_arabic_section = True para = doc.add_heading(text_to_process, level=2) apply_rtl_to_paragraph(para) # Apply RTL to this specific header continue # Header processed, move to next element elif "English Summary" in text_to_process: # Or other known LTR section headers in_arabic_section = False # Determining final RTL status for the current element based on section or content should_apply_rtl = in_arabic_section or current_text_is_arabic # Element Processing if element.name == "h1": para = doc.add_heading(text_to_process, level=1) if should_apply_rtl: apply_rtl_to_paragraph(para) else: apply_ltr_to_paragraph(para) elif element.name == "h2": # Handles H2s NOT specifically "Arabic Summary" para = doc.add_heading(text_to_process, level=2) if should_apply_rtl: apply_rtl_to_paragraph(para) else: apply_ltr_to_paragraph(para) elif element.name == "ul": for li in element.find_all("li"): list_item_text = li.get_text().strip() # Get raw text for contain_arabic check para = doc.add_paragraph(style='ListBullet') # Add content of
  • , handling simple bold/italic for content_node in li.contents: if content_node.name in ['strong', 'b']: run = para.add_run(content_node.get_text()); run.bold = True elif content_node.name in ['em', 'i']: run = para.add_run(content_node.get_text()); run.italic = True elif content_node.name is None: para.add_run(str(content_node)) if not para.text.strip() and list_item_text: # if complex li structure -> Fallback para.add_run(list_item_text) li_text_is_arabic = contains_arabic(list_item_text) # Checking raw text if in_arabic_section or li_text_is_arabic: apply_rtl_to_paragraph(para) else: apply_ltr_to_paragraph(para) elif element.name == "ol": # Similar handling for ordered lists for li in element.find_all("li"): list_item_text = li.get_text().strip() para = doc.add_paragraph(style='ListNumber') for content_node in li.contents: if content_node.name in ['strong', 'b']: run = para.add_run(content_node.get_text()); run.bold = True elif content_node.name in ['em', 'i']: run = para.add_run(content_node.get_text()); run.italic = True elif content_node.name is None: para.add_run(str(content_node)) if not para.text.strip() and list_item_text: para.add_run(list_item_text) li_text_is_arabic = contains_arabic(list_item_text) if in_arabic_section or li_text_is_arabic: apply_rtl_to_paragraph(para) else: apply_ltr_to_paragraph(para) elif element.name == "p": para = doc.add_paragraph() # Iterate through children of

    to handle mixed content (bold, italic, text) for content_node in element.contents: if content_node.name in ['strong', 'b']: run = para.add_run(content_node.get_text()) run.bold = True elif content_node.name in ['em', 'i']: # Added italic handling run = para.add_run(content_node.get_text()) run.italic = True elif content_node.name is None: # NavigableString (plain text) para.add_run(str(content_node)) # Fallback if element.contents was empty but element.get_text() had text (e.g.

    ) if not para.text.strip() and text_to_process: para.add_run(text_to_process) if should_apply_rtl: apply_rtl_to_paragraph(para) else: apply_ltr_to_paragraph(para) elif element.name == "div" and "dir" in element.attrs and element.attrs["dir"] == "rtl": # Handle children of an explicit
    # This forces RTL for its children regardless of the global 'in_arabic_section' during its scope previous_in_arabic_section_state = in_arabic_section # Save state in_arabic_section = True # Force RTL for children of this div for child_element in element.children: # Iterate direct children if child_element.name is None: # Text node within the div if child_element.string and child_element.string.strip(): child_para = doc.add_paragraph(child_element.string.strip()) apply_rtl_to_paragraph(child_para) # Force RTL continue child_text = child_element.get_text().strip() if not child_text: continue # Skip empty child elements # Simplified processing for children of div dir="rtl" # Adapt this based on expected content (h2, p, ul, ol etc.) if child_element.name == "h2": child_para = doc.add_heading(child_text, level=2) elif child_element.name == "ul": # Basic ul handling in div for li_in_div in child_element.find_all("li"): child_para = doc.add_paragraph(li_in_div.get_text().strip(), style='ListBullet') apply_rtl_to_paragraph(child_para) # Force RTL continue # Handled ul, go to next child of div elif child_element.name == "ol": # Basic ol handling in div for li_in_div in child_element.find_all("li"): child_para = doc.add_paragraph(li_in_div.get_text().strip(), style='ListNumber') apply_rtl_to_paragraph(child_para) # Force RTL continue # Handled ol, go to next child of div else: # Treat other children (like

    ) or unrecognized as

    child_para = doc.add_paragraph() for content_node in child_element.contents: # Add content with formatting if content_node.name in ['strong', 'b']: run = child_para.add_run(content_node.get_text()); run.bold = True elif content_node.name in ['em', 'i']: run = child_para.add_run(content_node.get_text()); run.italic = True elif content_node.name is None: child_para.add_run(str(content_node)) if not child_para.text.strip() and child_text: child_para.add_run(child_text) # Fallback apply_rtl_to_paragraph(child_para) # Force RTL due to parent div being dir="rtl" in_arabic_section = previous_in_arabic_section_state # Restore state else: # Fallback for other top-level elements not explicitly handled if text_to_process: # Only add if there's actual text para = doc.add_paragraph(text_to_process) if should_apply_rtl: apply_rtl_to_paragraph(para) else: apply_ltr_to_paragraph(para) doc.save(filepath) def save_as_pdf(summary, filepath): doc = SimpleDocTemplate(filepath, pagesize=A4, rightMargin=2*cm, leftMargin=2*cm, topMargin=2*cm, bottomMargin=2*cm) styles = getSampleStyleSheet() story = [] paragraphs = summary.split("\n\n") for para in paragraphs: if "Arabic Summary" in para or any('\u0600' <= c <= '\u06FF' for c in para): para_style = ParagraphStyle('Arabic', parent=styles['Normal'], alignment=TA_RIGHT) elif para.strip().startswith("##"): para_style = styles["Heading2"] else: para_style = styles["Normal"] story.append(Paragraph(para.strip().replace("**", ""), para_style)) story.append(Spacer(1, 12)) doc.build(story) def evaluate_summary_with_gemini(original_text, generated_summary): # yup, we will use Gemini to rate Gemini! # will use Flash 2.0 for evaluation due to higher TPR eval_model = model_2p0 # 1. Faithfulness/Relevance Check faithfulness_prompt = f""" Original Text: {original_text} Generated Summary: {generated_summary} Provide a 'Faithfulness Score' from 1 to 5, where 5 is perfectly faithful and 1 is very unfaithful. Output only the score as a single digit. """ readability_prompt = f""" Summary: {generated_summary} Provide a quality rating from 1 to 5 for coherence and tone, where 5 is excellent and 1 is poor. Output only the score as a single digit. """ try: faithfulness_score = int(eval_model.generate_content(faithfulness_prompt).text.strip()) except: faithfulness_score = 1 try: readability_score = int(eval_model.generate_content(readability_prompt).text.strip()) except: readability_score = 1 overall_score = round((faithfulness_score + readability_score) / 2) # Averaging the scores return overall_score def create_progress_bar_html(score, max_score=5): # the HTML visual for scores' progress bar percentage = (score / max_score) * 100 # Red for low, Yellow for medium, Green for high. # We can calculate color based on percentage to get a gradient effect, (Red at 0%, Yellow at 50%, Green at 100%) if percentage <= 50: # Linear interpolation from Red (255,0,0) to Yellow (255,255,0) r = 255 g = int(255 * (percentage / 50)) b = 0 else: # Linear interpolation from Yellow (255,255,0) to Green (0,255,0) r = int(255 * ((100 - percentage) / 50)) g = 255 b = 0 color = f"rgb({r},{g},{b})" # Dark grey empty bar return f"""

    {score}/{max_score}
    """ with gr.Blocks() as UI: gr.Markdown("## 🤖💬 Multi-Input Academic Summarizer") chat_state = gr.State([]) extracted_text_state = gr.State("") # stored for re-evaluation # 1️⃣ Input Block gr.Markdown("### 📂 Input Options") with gr.Row(): with gr.Column(scale=1): text_input = gr.Textbox(label="📝 Paste Text", lines=4, placeholder="Paste your text here...") clear_text = gr.Button("🧹 Clear Text", scale=0) # scale=0 for minimal button space with gr.Column(scale=1): file_input = gr.File(label="📎 Upload File") with gr.Column(scale=1): yt_input = gr.Textbox(label="🔗 YouTube Link", placeholder="http://youtube.com/watch?v=...") clear_yt = gr.Button("❌ Clear YouTube Link", scale=0) # 2️⃣ Settings Block gr.Markdown("### ⚙️ Summary Settings") with gr.Row(): tone = gr.Radio(["Gen Z", "Balanced", "Professional"], label="🎭 Tone", value="Balanced") length = gr.Radio(["Short", "Medium", "Long"], label="📏 Length", value="Medium") creativity = gr.Radio(["Low", "Medium", "High"], label="🎨 Creativity Level", value="Medium") with gr.Row(): language = gr.Radio(["English", "Arabic", "Both"], label="🌐 Output Language", value="English") model_choice = gr.Radio(["Auto (2.0 → 2.5)", "Flash 2.0", "Flash 2.5"], label="🤖 Gemini Model", value="Auto (2.0 → 2.5)") file_formats = gr.CheckboxGroup( choices=["TXT", "PDF", "DOCX"], value=["TXT", "PDF", "DOCX"], # Default: all selected label="📁 Output Formats", interactive=True ) # 3️⃣ Sumbit Button submit_btn = gr.Button("🚀 Summarize") # 4️⃣ Chat Output gr.Markdown("### 💬 Summary Output") chatbot = gr.Chatbot(label="📚 Summary Assistant", type='tuples') # New Evaluation Output gr.Markdown("### ✨ Summary Quality Assessment (By Gemini)") with gr.Column(): overall_quality_bar = gr.HTML(label="Overall Quality", value=create_progress_bar_html(0)) # 5️⃣ Download Files gr.Markdown("### 📥 Download Your Summary") with gr.Row(): txt_out = gr.File(label="📄 TXT") pdf_out = gr.File(label="📄 PDF") docx_out = gr.File(label="📄 DOCX") def chat_handler(history, text, file, yt, tone, length, language, creativity_level, model_choice, file_formats, extracted_text_state_val): extracted = "" # Reset evaluation output visibility overall_quality_bar_html = create_progress_bar_html(0) inputs_filled = sum([ bool(text.strip()), file is not None, bool(yt.strip()) ]) if inputs_filled > 1: return history + [("⚠️ Please use only one input method at a time (Text, File, or YouTube).", "")], None, None, None, None, None if yt: return history + [("⚠️ YouTube support is currently disabled on Hugging Face Spaces due to platform restrictions.", "")], None, None, None, None, None #audio_path = download_youtube_audio(yt) #extracted = transcribe_audio(audio_path) #user_msg = f"🔗 YouTube Link Provided\nModel: {model_choice}, Tone: {tone}, Length: {length}, Creativity: {creativity_level}, Language: {language}" elif file is not None: extracted = handle_input_file(file, None) user_msg = f"📄 File: `{file.name}`\nModel: {model_choice}, Tone: {tone}, Length: {length}, Creativity: {creativity_level}, Language: {language}" elif text.strip(): extracted = text user_msg = f"📝 Text Input: {text[:60]}...\nModel: {model_choice}, Tone: {tone}, Length: {length}, Creativity: {creativity_level}, Language: {language}" else: return history + [("⚠️ No valid input provided.", "")], None, None, None, None, None valid, msg = check_input_length(extracted) if not valid: return history + [(user_msg, msg)], None, None, None, None, None # Updating the state with the extracted text for potential re-evaluation extracted_text_state_val = extracted # Mapping creativity level to temperature/top_p creativity_map = { "Low": (0.2, 0.7), "Medium": (0.7, 0.9), "High": (1.0, 1.0) } temp, top_p = creativity_map.get(creativity_level, (0.7, 0.9)) generation_settings = genai.types.GenerationConfig( temperature=temp, top_p=top_p, #max_output_tokens=1000, #frequency_penalty=0.1, #presence_penalty=0.1 ) summary = summarize_text(extracted, tone, length, language, generation_settings, model_choice) # Performing Gemini-based evaluation (Summary Quality Assesment) overall_score = 0 if "❌" not in summary: # Only evaluate if summary generation was successful, obviously try: overall_score = evaluate_summary_with_gemini(extracted, summary) except: overall_score = 1 # Default to lowest score IF error overall_quality_bar_html = create_progress_bar_html(overall_score) # Saving files based on selected formats txt_path = None pdf_path = None docx_path = None temp_dir = tempfile.gettempdir() if "TXT" in file_formats: txt_path = os.path.join(temp_dir, "summary.txt") save_as_txt(summary, txt_path) if "PDF" in file_formats: pdf_path = os.path.join(temp_dir, "summary.pdf") save_as_pdf(summary, pdf_path) if "DOCX" in file_formats: docx_path = os.path.join(temp_dir, "summary.docx") save_as_docx(summary, docx_path) # Bot Response Formatting for Gradio Chatbot final_summary_content = summary # raw summary if "❌" not in summary: english_header_tag = "## English Summary" arabic_header_tag = "## Arabic Summary" english_header_start = summary.find(english_header_tag) arabic_header_start = summary.find(arabic_header_tag) if english_header_start != -1 and arabic_header_start != -1: # Case 1: Both English and Arabic sections are present english_section_raw = "" arabic_section_raw = "" if english_header_start < arabic_header_start: english_section_raw = summary[english_header_start : arabic_header_start].strip() arabic_section_raw = summary[arabic_header_start:].strip() else: arabic_section_raw = summary[arabic_header_start : english_header_start].strip() english_section_raw = summary[english_header_start:].strip() # Processing Arabic section: # 1. Extracting header and content arabic_lines = arabic_section_raw.split('\n') arabic_header_line = arabic_lines[0] if arabic_lines else "" arabic_content_lines = arabic_lines[1:] if len(arabic_lines) > 1 else [] # Replacing Markdown header '##' with HTML

    for direct rendering within the div if arabic_header_line.startswith('## '): arabic_header_html = f'

    {arabic_header_line[3:].strip()}

    ' else: arabic_header_html = f'

    {arabic_header_line.strip()}

    ' # Fallback for non-## headers arabic_content_html = "\n".join(arabic_content_lines) # Wrapping the processed Arabic content in an RTL div arabic_html_block = f'
    {arabic_header_html}\n{arabic_content_html}
    ' # Combining the original English section (Markdown) with the HTML-wrapped Arabic section final_summary_content = f"{english_section_raw}\n\n{arabic_html_block}" elif arabic_header_start != -1 or any('\u0600' <= char <= '\u06FF' for char in summary): # Case 2: Only Arabic summary generated or primarily Arabic # Attemptting to process header if it exists if arabic_header_start != -1: arabic_lines = summary.split('\n') arabic_header_line = arabic_lines[0] arabic_content_lines = arabic_lines[1:] if arabic_header_line.startswith('## '): arabic_header_html = f'

    {arabic_header_line[3:].strip()}

    ' else: arabic_header_html = f'

    {arabic_header_line.strip()}

    ' content_to_wrap = "\n".join(arabic_content_lines) final_summary_content = f'
    {arabic_header_html}\n{content_to_wrap}
    ' else: # No header, just wrap everything in RTL div final_summary_content = f'
    {summary}
    ' else: # Case: Only English summary generated final_summary_content = summary # No special wrapping needed, Markdown will work directly # Final bot response string passed to Gradio chatbot bot_response = f"""**📌 Summary:**\n\n{final_summary_content}\n\n📎 Download your files below 👇""" if "❌" not in summary else summary history.append((user_msg, bot_response)) return history, txt_path, pdf_path, docx_path, extracted_text_state_val, overall_quality_bar_html # Text Boxes Clear buttons logic: clear_text.click(fn=lambda: "", inputs=[], outputs=[text_input]) clear_yt.click(fn=lambda: "", inputs=[], outputs=[yt_input]) # 🔧 Submit Logic submit_btn.click( fn=chat_handler, inputs=[ chat_state, text_input, file_input, yt_input, tone, length, language, creativity, model_choice, file_formats, extracted_text_state ], outputs=[ chatbot, txt_out, pdf_out, docx_out, extracted_text_state, overall_quality_bar ] ) if __name__ == "__main__": UI.launch()