Spaces:

ManB2207540
/

demo-question-generation

Sleeping

App Files Files Community

Add t5-small model

by tbtminh - opened Jul 20

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+25

-6

Files changed (1) hide show

app.py +25 -6

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import gradio as gr
 import spacy
-from transformers import ProphetNetTokenizer, ProphetNetForConditionalGeneration, pipeline
 import torch
 import time
 import re
-import os # Đảm bảo bạn đã import os
 # Tải mô hình spaCy
 if not spacy.util.is_package("en_core_web_md"):
@@ -15,7 +15,8 @@ nlp = spacy.load("en_core_web_md")
 print("✅ Đã tải/nạp mô hình spaCy.")
 MODEL_PATHS = {
     "prophetnet2": "ManB2207540/prophetnet_SQuAD_1.1-2epoch_break",
-    "prophetnet tieu chuan": "microsoft/prophetnet-large-uncased-squad-qg"
 }
 def load_pipeline(model_path):
@@ -30,17 +31,31 @@ def load_pipeline(model_path):
         device=0 if torch.cuda.is_available() else -1
     )
 pipeline_cache = {}
 def get_pipeline(model_name):
     model_path = MODEL_PATHS[model_name]
     if model_name not in pipeline_cache:
-        pipeline_cache[model_name] = load_pipeline(model_path)
     return pipeline_cache[model_name]
 # Tự viết hàm capitalize thông minh
 def smart_capitalize(text):
     # Giữ nguyên cách viết hoa phần còn lại, chỉ viết hoa chữ đầu nếu cần
     text = text.strip()
@@ -54,7 +69,11 @@ def smart_capitalize(text):
 def generate_question(context, answer, model_name):
     pipe = get_pipeline(model_name)
     tokenizer = pipe.tokenizer
-    prompt = f"context: {context} answer: {answer}"
     # Cắt prompt nếu vượt quá giới hạn token
     encoded = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)

 import gradio as gr
 import spacy
+from transformers import ProphetNetTokenizer, ProphetNetForConditionalGeneration, pipeline, T5Tokenizer, T5ForConditionalGeneration
 import torch
 import time
 import re
+import os
 # Tải mô hình spaCy
 if not spacy.util.is_package("en_core_web_md"):
 print("✅ Đã tải/nạp mô hình spaCy.")
 MODEL_PATHS = {
     "prophetnet2": "ManB2207540/prophetnet_SQuAD_1.1-2epoch_break",
+    "prophetnet tieu chuan": "microsoft/prophetnet-large-uncased-squad-qg",
+    "t5-small-finetuned": "tbtminh/t5-small-qg-finetuned"
 }
 def load_pipeline(model_path):
         device=0 if torch.cuda.is_available() else -1
     )
+def load_t5_pipeline(model_path):
+    tokenizer = T5Tokenizer.from_pretrained(model_path)
+    model = T5ForConditionalGeneration.from_pretrained(model_path)
+    return pipeline(
+        "text2text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_length=256,
+        num_return_sequences=1,
+        device=0 if torch.cuda.is_available() else -1
+    )
 pipeline_cache = {}
 def get_pipeline(model_name):
     model_path = MODEL_PATHS[model_name]
     if model_name not in pipeline_cache:
+        if model_name == "t5-small-finetuned":
+            pipeline_cache[model_name] = load_t5_pipeline(model_path)
+        else:
+            pipeline_cache[model_name] = load_pipeline(model_path)
     return pipeline_cache[model_name]
 # Tự viết hàm capitalize thông minh
 def smart_capitalize(text):
     # Giữ nguyên cách viết hoa phần còn lại, chỉ viết hoa chữ đầu nếu cần
     text = text.strip()
 def generate_question(context, answer, model_name):
     pipe = get_pipeline(model_name)
     tokenizer = pipe.tokenizer
+    if model_name == "t5-small-finetuned":
+        prompt = f"generate question: context: {context} answer: {answer}"
+    else:
+        prompt = f"context: {context} answer: {answer}"
     # Cắt prompt nếu vượt quá giới hạn token
     encoded = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)