| !pip install gradio
|
| !pip install easyocr
|
| !pip install pdfplumber
|
|
|
| from transformers import pipeline
|
| import gradio as gr
|
| import easyocr
|
| import pdfplumber
|
| import random
|
|
|
|
|
| title = "<h1>產生英文題目</h1>"
|
| description = """這是一個利用hugging face 產生英文題目的小專案"""
|
| textbox = gr.Textbox(label="請輸入英文文章:", placeholder="While lily is setting...", lines=5)
|
|
|
|
|
| demo = gr.Blocks()
|
|
|
|
|
| question_generator = pipeline("text2text-generation", model="valhalla/t5-base-qg-hl")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def question_generator_with_answer(context):
|
|
|
|
|
| question_data = question_generator(f"question:{context}",
|
| max_length=100, do_sample=True, temperature=0.8, top_p=0.9)
|
| question = question_data[0]['generated_text']
|
|
|
|
|
| answer_data = question_generator(f"answer:{context}",
|
| max_length=100, do_sample=True, temperature=1, top_p=0.9)
|
| correct_answer = answer_data[0]['generated_text']
|
|
|
|
|
| wrong_answers = set()
|
| while len(wrong_answers) < 3:
|
| wrong_data = question_generator(f"answer: {context}", max_length=50,
|
| do_sample=True, temperature=1.0, top_p=0.8)
|
| wrong_answer = wrong_data[0]['generated_text']
|
| if wrong_answer != correct_answer and "?" not in wrong_answer:
|
| wrong_answers.add(wrong_answer)
|
|
|
|
|
|
|
| choices = list(wrong_answers) + [correct_answer]
|
| random.shuffle(choices)
|
|
|
|
|
|
|
| return {
|
| "question": question,
|
| "choices": choices,
|
| "correct_answer": correct_answer
|
| }
|
|
|
| def format_question_output(context):
|
| question_result=[]
|
| for j in range(4):
|
| result = question_generator_with_answer(context)
|
| question_text = f"{result['question']}\n"
|
| choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(result['choices'])])
|
| question_result.append(f"\nQ{j+1}.{question_text}\n{choices_text}\n")
|
| return "\n".join(question_result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def extract_text_from_pdf(pdf_path):
|
| text = ""
|
| with pdfplumber.open(pdf_path.name) as pdf:
|
| for page in pdf.pages:
|
| text += page.extract_text() + "\n"
|
| ls = format_question_output(text)
|
| return ls
|
|
|
|
|
|
|
|
|
| def OCR(photo):
|
| text_inner = ""
|
| questions = []
|
| reader = easyocr.Reader(['en', 'ch_tra'])
|
| results = reader.readtext(photo)
|
| for (bbox, text, prob) in results:
|
| text_inner += text
|
| return text_inner
|
|
|
|
|
|
|
| def OCR_gen(text):
|
| if not text.strip():
|
| return "錯誤:OCR 沒有輸出任何可用的文字,請重新檢查圖片內容。"
|
| ls = format_question_output(text)
|
| return ls
|
|
|
|
|
| with demo:
|
| gr.Markdown(title)
|
| gr.Markdown(description)
|
| with gr.Tabs():
|
| with gr.TabItem("輸入文字"):
|
| with gr.Row():
|
| text_input = gr.Textbox(label="請輸入英文文章:", placeholder="While lily is setting...", lines=5)
|
| with gr.Column():
|
| text_output = gr.Textbox(label="題目")
|
| text_button = gr.Button("產生題目")
|
| with gr.TabItem("PDF文件辨識"):
|
| with gr.Row():
|
| PDF_input = gr.File(label="請上傳PDF文件")
|
| with gr.Column():
|
| PDF_output = gr.Textbox()
|
| PDF_button = gr.Button("產生題目")
|
| with gr.TabItem("圖片辨識"):
|
| with gr.Row():
|
| image_input = gr.Image()
|
|
|
| with gr.Column():
|
| img_tem = gr.Textbox(placeholder="請確認辨識結果",label="辨識結果")
|
| img_button = gr.Button("開始解析")
|
| image_button = gr.Button("產生題目")
|
|
|
| with gr.Column():
|
| image_output = gr.Textbox(label="題目")
|
|
|
|
|
| def validate_and_generate(text):
|
| if not text.strip():
|
| return "請輸入文章以產生題目"
|
| return format_question_output(text)
|
|
|
|
|
| text_button.click(validate_and_generate, inputs=text_input, outputs=text_output)
|
|
|
|
|
| def test_PDF(file):
|
| if not file:
|
| return "請上傳PDF文件以產生題目"
|
| return extract_text_from_pdf(file)
|
|
|
|
|
| PDF_button.click(test_PDF, inputs=PDF_input, outputs=PDF_output)
|
|
|
|
|
| def test_image(image):
|
| if image is None:
|
| return "請上傳圖片以產生題目"
|
| return OCR(image)
|
|
|
|
|
| img_button.click(test_image, inputs=image_input, outputs=img_tem)
|
|
|
|
|
|
|
| def test_finished(text):
|
| if (not text.strip() or text == "請上傳圖片以產生題目"):
|
| return "請確認文章已經輸入"
|
| return OCR_gen(text)
|
| image_button.click(test_finished, inputs=img_tem, outputs=image_output)
|
|
|
|
|
|
|
| demo.launch()
|
|
|