Spaces:
Sleeping
Sleeping
| from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer | |
| import re | |
| # Load saved model and tokenizer | |
| model_checkpoint = "24NLPGroupO/EmailGeneration" | |
| tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, truncation=True) | |
| model = AutoModelForCausalLM.from_pretrained(model_checkpoint) | |
| # Set up the generation pipeline | |
| generator = pipeline('text-generation', model=model, tokenizer=tokenizer) | |
| def clean_generated_text(text): | |
| #Basic cleaning | |
| text = re.sub(r'^(Re:|Fwd:)', '', text) # Remove reply and forward marks | |
| text = re.sub(r'Best regards,.*$', '', text, flags=re.DOTALL) # Remove signature | |
| text = re.sub(r'PHONE.*$', '', text, flags=re.DOTALL) # Remove phone numbers | |
| text = re.sub(r'Email:.*$', '', text, flags=re.DOTALL) # Remove email addresses | |
| text = re.sub(r'Cc:.*$', '', text, flags=re.DOTALL) # Remove CC list | |
| text = re.sub(r'\* Attachments:.*', '', text, flags=re.S) # Remove Attachments | |
| text = re.sub(r'©️ .*$', '', text, flags=re.DOTALL) # Remove copyright and ownership statements | |
| text = re.sub(r'URL', '', text) # Remove URLs | |
| text = re.sub(r'NUMBER', '10', text) # Replace 'NUMBER' with a real number | |
| text = re.sub(r'CURRENCYNUMBER', 'USD 100', text) # Replace 'CURRENCYNUMBER' with a real value | |
| text = re.sub(r'About Us.*', '', text, flags=re.DOTALL) # Remove 'About Us' and all following text | |
| text = re.sub(r'\d+ [^\s]+ St\.?,?.*?\d{5}', '', text) # Remove street | |
| text = re.sub(r'\d+ [^\s]+ Ave\.?,?.*?\d{5}', '', text) # Remove avenues | |
| text = re.sub(r'\d+ [^\s]+ Rd\.?,?.*?\d{5}', '', text) # Remove roads | |
| text = re.sub(r'\d+ [^\s]+ Ln\.?,?.*?\d{5}', '', text) # Remove lanes | |
| text = re.sub(r'\d+ [^\s]+ Blvd\.?,?.*?\d{5}', '', text) # Remove boulevards | |
| text = re.sub(r'\d+ [^\s]+ Dr\.?,?.*?\d{5}', '', text) # Remove drives | |
| text = re.sub(r'\d+ [^\s]+ Ct\.?,?.*?\d{5}', '', text) # Remove courts | |
| return text.strip() | |
| def generate_email(product, gender, profession, hobby): | |
| input_text = f"{product} {gender} {profession} {hobby}" | |
| result = generator( | |
| input_text, # The starting text that guides the model on what to generate | |
| max_length=256, # Set a suitable maximum length | |
| top_k=40, # Consider more top options words | |
| top_p=0.6, # Control the probability range for word choices | |
| temperature=0.4, # Control the randomness of generation | |
| repetition_penalty=1.5, # Reduce content repetition | |
| num_return_sequences=2, # Generate three texts | |
| do_sample=True | |
| ) | |
| # Clean each generated text | |
| cleaned_texts = [clean_generated_text(seq['generated_text']) for seq in result] | |
| # Choose the best text based on length and clarity | |
| best_text = max(cleaned_texts, key=len) | |
| return best_text | |