Buckets:
| import gradio as gr | |
| import torch | |
| from transformers import PegasusForConditionalGeneration, PegasusTokenizer | |
| import re | |
| import os | |
| def load_model(): | |
| """Load the model from local storage""" | |
| torch_device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| print(f"Using device: {torch_device}") | |
| # Load tokenizer and model from local directory | |
| tokenizer = PegasusTokenizer.from_pretrained('./models') | |
| model = PegasusForConditionalGeneration.from_pretrained('./models').to(torch_device) | |
| return tokenizer, model, torch_device | |
| def split_into_paragraphs(text): | |
| """Split text into paragraphs while preserving empty lines.""" | |
| paragraphs = text.split('\n\n') | |
| return [p.strip() for p in paragraphs if p.strip()] | |
| def split_into_sentences(paragraph): | |
| """Split paragraph into sentences using regex.""" | |
| sentences = re.split(r'(?<=[.!?])\s+', paragraph) | |
| return [s.strip() for s in sentences if s.strip()] | |
| def get_response(input_text, num_return_sequences, tokenizer, model, torch_device): | |
| batch = tokenizer.prepare_seq2seq_batch( | |
| [input_text], | |
| truncation=True, | |
| padding='longest', | |
| max_length=80, | |
| return_tensors="pt" | |
| ).to(torch_device) | |
| translated = model.generate( | |
| **batch, | |
| num_beams=10, | |
| num_return_sequences=num_return_sequences, | |
| temperature=1.0, | |
| repetition_penalty=2.8, | |
| length_penalty=1.2, | |
| max_length=80, | |
| min_length=5, | |
| no_repeat_ngram_size=3 | |
| ) | |
| tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True) | |
| return tgt_text[0] | |
| def get_response_from_text(context, tokenizer, model, torch_device): | |
| """Process entire text while preserving paragraph structure.""" | |
| paragraphs = split_into_paragraphs(context) | |
| paraphrased_paragraphs = [] | |
| for paragraph in paragraphs: | |
| sentences = split_into_sentences(paragraph) | |
| paraphrased_sentences = [] | |
| for sentence in sentences: | |
| if len(sentence.split()) < 3: | |
| paraphrased_sentences.append(sentence) | |
| continue | |
| try: | |
| paraphrased = get_response(sentence, 1, tokenizer, model, torch_device) | |
| if not any(phrase in paraphrased.lower() for phrase in ['it\'s like', 'in other words']): | |
| paraphrased_sentences.append(paraphrased) | |
| else: | |
| paraphrased_sentences.append(sentence) | |
| except Exception as e: | |
| print(f"Error processing sentence: {e}") | |
| paraphrased_sentences.append(sentence) | |
| paraphrased_paragraphs.append(' '.join(paraphrased_sentences)) | |
| return '\n\n'.join(paraphrased_paragraphs) | |
| def create_interface(): | |
| """Create and configure the Gradio interface""" | |
| # Load model and tokenizer | |
| tokenizer, model, torch_device = load_model() | |
| def greet(context): | |
| return get_response_from_text(context, tokenizer, model, torch_device) | |
| # Create interface with improved styling | |
| iface = gr.Interface( | |
| fn=greet, | |
| inputs=gr.Textbox( | |
| lines=15, | |
| label="Input Text", | |
| placeholder="Enter your text here...", | |
| elem_classes="input-text" | |
| ), | |
| outputs=gr.Textbox( | |
| lines=15, | |
| label="Paraphrased Text", | |
| elem_classes="output-text" | |
| ), | |
| title="Advanced Text Paraphraser", | |
| description="Enter text to generate a high-quality paraphrased version while maintaining paragraph structure.", | |
| theme="default", | |
| css=""" | |
| .input-text, .output-text { | |
| font-size: 16px !important; | |
| font-family: Arial, sans-serif !important; | |
| min-height: 300px !important; | |
| } | |
| """ | |
| ) | |
| return iface | |
| if __name__ == "__main__": | |
| # Create and launch the interface | |
| interface = create_interface() | |
| interface.launch() |
Xet Storage Details
- Size:
- 3.96 kB
- Xet hash:
- 4f1b45b5bd14defbcfc467e2426475c99d3a5b98444e1764cbfde2ca77a96cb7
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.