Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import re | |
| import os | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| tokenizer = AutoTokenizer.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer") | |
| def generate_question_answer_pairs(input_text): | |
| if input_text is None: | |
| return "Please enter a text" | |
| d = {'Question':[],'Answer':[]} | |
| df = pd.DataFrame(data=d) | |
| sentences = re.split(r'(?<=[.!?])', input_text) | |
| question_answer_pairs = [] | |
| for sentence in sentences: | |
| input_ids = tokenizer.encode(sentence, return_tensors="pt") | |
| outputs = model.generate(input_ids, max_length=100, num_return_sequences=1) | |
| question_answer = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| question_answer_pairs.append(question_answer) | |
| result = '' | |
| for question_answer in question_answer_pairs: | |
| qa_parts = question_answer.split("?") | |
| if len(qa_parts) >= 2: | |
| question_part = qa_parts[0] + "?" | |
| answer_part = qa_parts[1].strip() | |
| new_data = {'Question': [question_part], 'Answer': [answer_part]} | |
| df = pd.concat([df, pd.DataFrame(new_data)], ignore_index=True) | |
| result += f"Question: {question_part}\nAnswer: {answer_part}\n\n" | |
| df.to_csv("QAPairs.csv") | |
| return result, "QAPairs.csv" | |
| title = "Question-Answer Pairs Generation" | |
| input_text = gr.Textbox(lines=4, label="Text:") | |
| output_file = gr.File(label="Download as csv") | |
| output_text = gr.Textbox() | |
| interface = gr.Interface( | |
| fn=generate_question_answer_pairs, | |
| inputs=input_text, | |
| outputs=[output_text, output_file], | |
| title=title, | |
| ) | |
| interface.launch() |