Spaces:
Build error
Build error
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| import fitz # PyMuPDF | |
| from docx import Document | |
| # Load model and tokenizer | |
| model_name = "microsoft/phi-2" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16) | |
| def extract_text_from_pdf(file): | |
| doc = fitz.open(stream=file.read(), filetype="pdf") | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| def extract_text_from_docx(file): | |
| doc = Document(file) | |
| return "\n".join([paragraph.text for paragraph in doc.paragraphs]) | |
| def convert_to_story(file): | |
| if file is None: | |
| return "Please upload a file." | |
| file_extension = file.name.split('.')[-1].lower() | |
| if file_extension == 'pdf': | |
| text = extract_text_from_pdf(file) | |
| elif file_extension == 'docx': | |
| text = extract_text_from_docx(file) | |
| else: | |
| return "Unsupported file format. Please upload a PDF or DOCX file." | |
| prompt = f"Convert the following news article into a short children's story (maximum 200 words):\n\n{text}\n\nChildren's story:" | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=200, | |
| temperature=0.7, | |
| top_p=0.95, | |
| do_sample=True | |
| ) | |
| story = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return story.split("Children's story:")[-1].strip() | |
| iface = gr.Interface( | |
| fn=convert_to_story, | |
| inputs=gr.File(label="Upload PDF or DOCX file"), | |
| outputs="text", | |
| title="News to Children's Story Converter", | |
| description="Upload a news article in PDF or DOCX format to convert it into a short children's story." | |
| ) | |
| iface.launch() |