UVT / app.py
Wewoo's picture
Update app.py
f0d1550 verified
import gradio as gr
from transformers import pipeline
from langdetect import detect
import docx
from pypdf import PdfReader
# ========================
# Load models
# ========================
# English summarizer
summarizer_en = pipeline("summarization", model="facebook/bart-large-cnn")
# Vietnamese summarizer
summarizer_vi = pipeline("summarization", model="VietAI/vit5-base-vietnamese-summarization")
# Quiz generator
quiz_generator = pipeline("text2text-generation", model="google/flan-t5-base")
# ========================
# Read file
# ========================
def extract_text(file):
if file.name.endswith(".pdf"):
reader = PdfReader(file)
text = ""
for page in reader.pages:
if page.extract_text():
text += page.extract_text() + "\n"
return text
elif file.name.endswith(".docx"):
doc = docx.Document(file)
return "\n".join([para.text for para in doc.paragraphs])
elif file.name.endswith(".txt"):
return file.read().decode("utf-8")
else:
return "Unsupported file format"
# ========================
# Main AI function
# ========================
def summarize_and_quiz(file, num_questions):
text = extract_text(file)
if len(text) < 200:
return "Văn bản quá ngắn / Text too short", ""
# --- Detect language ---
lang = detect(text)
# --- Choose summarizer ---
if lang == "vi":
summary = summarizer_vi(
text,
max_length=200,
min_length=80,
do_sample=False
)[0]["summary_text"]
prompt = f"""
Tạo {num_questions} câu hỏi trắc nghiệm ngắn gọn dựa vào nội dung sau:
{summary}
Mỗi câu có đáp án đúng.
"""
else:
summary = summarizer_en(
text,
max_length=200,
min_length=80,
do_sample=False
)[0]["summary_text"]
prompt = f"""
Create {num_questions} multiple-choice questions based on the following content:
{summary}
Each question must include the correct answer.
"""
quiz = quiz_generator(prompt, max_length=512)[0]["generated_text"]
return summary, quiz
# ========================
# Gradio Interface
# ========================
interface = gr.Interface(
fn=summarize_and_quiz,
inputs=[
gr.File(label="📄 Upload PDF / DOCX / TXT"),
gr.Slider(5, 10, value=5, step=1, label="Số câu hỏi Quiz")
],
outputs=[
gr.Textbox(label="📌 Tóm tắt nội dung", lines=8),
gr.Textbox(label="📝 Quiz tự động tạo", lines=12)
],
title="📚 AI TÓM TẮT & TẠO QUIZ (VIỆT + ANH)",
description="""
AI tự động nhận diện ngôn ngữ và tóm tắt tài liệu (Việt/Anh), sau đó tạo quiz giúp sinh viên ghi nhớ nhanh.
"""
)
interface.launch()