import gradio as gr from transformers import pipeline from langdetect import detect import docx from pypdf import PdfReader # ======================== # Load models # ======================== # English summarizer summarizer_en = pipeline("summarization", model="facebook/bart-large-cnn") # Vietnamese summarizer summarizer_vi = pipeline("summarization", model="VietAI/vit5-base-vietnamese-summarization") # Quiz generator quiz_generator = pipeline("text2text-generation", model="google/flan-t5-base") # ======================== # Read file # ======================== def extract_text(file): if file.name.endswith(".pdf"): reader = PdfReader(file) text = "" for page in reader.pages: if page.extract_text(): text += page.extract_text() + "\n" return text elif file.name.endswith(".docx"): doc = docx.Document(file) return "\n".join([para.text for para in doc.paragraphs]) elif file.name.endswith(".txt"): return file.read().decode("utf-8") else: return "Unsupported file format" # ======================== # Main AI function # ======================== def summarize_and_quiz(file, num_questions): text = extract_text(file) if len(text) < 200: return "Văn bản quá ngắn / Text too short", "" # --- Detect language --- lang = detect(text) # --- Choose summarizer --- if lang == "vi": summary = summarizer_vi( text, max_length=200, min_length=80, do_sample=False )[0]["summary_text"] prompt = f""" Tạo {num_questions} câu hỏi trắc nghiệm ngắn gọn dựa vào nội dung sau: {summary} Mỗi câu có đáp án đúng. """ else: summary = summarizer_en( text, max_length=200, min_length=80, do_sample=False )[0]["summary_text"] prompt = f""" Create {num_questions} multiple-choice questions based on the following content: {summary} Each question must include the correct answer. """ quiz = quiz_generator(prompt, max_length=512)[0]["generated_text"] return summary, quiz # ======================== # Gradio Interface # ======================== interface = gr.Interface( fn=summarize_and_quiz, inputs=[ gr.File(label="📄 Upload PDF / DOCX / TXT"), gr.Slider(5, 10, value=5, step=1, label="Số câu hỏi Quiz") ], outputs=[ gr.Textbox(label="📌 Tóm tắt nội dung", lines=8), gr.Textbox(label="📝 Quiz tự động tạo", lines=12) ], title="📚 AI TÓM TẮT & TẠO QUIZ (VIỆT + ANH)", description=""" AI tự động nhận diện ngôn ngữ và tóm tắt tài liệu (Việt/Anh), sau đó tạo quiz giúp sinh viên ghi nhớ nhanh. """ ) interface.launch()