Spaces:
Sleeping
Sleeping
| from transformers import T5Tokenizer, AutoModelForSeq2SeqLM, pipeline | |
| class QGenerator: | |
| def __init__(self): | |
| tokenizer = T5Tokenizer.from_pretrained("valhalla/t5-small-qg-hl", use_fast=False) | |
| model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-small-qg-hl") | |
| self.qg = pipeline("text2text-generation", model=model, tokenizer=tokenizer) | |
| def split_sentences(self, text): | |
| # Simple sentence splitting (for better results, use nltk or spacy) | |
| return [s.strip() for s in text.split('.') if s.strip()] | |
| def chunk_text(self, text, chunk_size=512): | |
| return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] | |
| def generate(self, text, max_questions=5): | |
| questions = [] | |
| sentences = self.split_sentences(text) | |
| for sentence in sentences: | |
| if len(questions) >= max_questions: | |
| break | |
| input_text = f"generate question: {sentence} </s>" | |
| try: | |
| result = self.qg(input_text, max_length=64, num_return_sequences=1)[0] | |
| question = result["generated_text"] | |
| if question and question not in questions: | |
| questions.append(question) | |
| except Exception as e: | |
| print("Error generating question:", e) | |
| continue | |
| return questions | |