| | import csv
|
| | import torch
|
| | from transformers import pipeline
|
| | import random
|
| |
|
| |
|
| | chatbot = pipeline(
|
| | "text-generation",
|
| | model="mistralai/Mistral-7B-Instruct-v0.3",
|
| | torch_dtype=torch.float16,
|
| | device=0
|
| | )
|
| |
|
| |
|
| | sentiments = ["Positive or Encouraging", "Neutral or Factual", "Negative or Toxic"]
|
| |
|
| |
|
| | formats = [
|
| | "Feature Stories", "Instructional Manuals", "FAQs", "Policy Documents", "Live Stream Descriptions",
|
| | "Editorial Content", "Research Papers", "User Manuals", "Commentaries", "Opinion Pieces",
|
| | "Newsletters", "Online Courses", "Photo Essays", "Annual Reports", "User-Generated Content",
|
| | "Testimonials", "DIY Content", "How-To Videos", "Campaign Reports", "Legal Briefs",
|
| | "Blog Posts", "Case Studies", "Tutorials", "Interviews", "Press Releases",
|
| | "eBooks", "Infographics", "Webinars", "Podcast Descriptions", "Video Scripts",
|
| | "Advertisements", "Forum Discussions", "Whitepapers", "Surveys", "Product Reviews",
|
| | "Event Summaries", "Opinion Editorials", "Letters to the Editor", "Round-Up Posts",
|
| | "Buying Guides", "Checklists", "Cheat Sheets", "Recipes", "Travel Guides",
|
| | "Profiles", "Lists", "Q&A Sessions", "Debates", "Polls"
|
| | ]
|
| |
|
| |
|
| | topics = [
|
| | "Family", "Travel", "Politics", "Science", "Health", "Technology", "Sports",
|
| | "Education", "Environment", "Economics", "Culture", "History", "Music",
|
| | "Literature", "Food", "Art", "Fashion", "Entertainment", "Business",
|
| | "Relationships", "Fitness", "Automotive", "Finance", "Real Estate", "Law",
|
| | "Psychology", "Philosophy", "Religion", "Gardening", "DIY", "Hobbies",
|
| | "Pets", "Career", "Marketing", "Customer Service", "Networking", "Innovation",
|
| | "Artificial Intelligence", "Sustainability", "Social Issues", "Digital Media",
|
| | "Programming", "Cybersecurity", "Astronomy", "Geography", "Travel Tips",
|
| | "Cooking", "Parenting", "Productivity", "Mindfulness", "Mental Health",
|
| | "Self-Improvement", "Leadership", "Teamwork", "Volunteering", "Nonprofits",
|
| | "Gaming", "E-commerce", "Photography", "Videography", "Film", "Television",
|
| | "Streaming Services", "Podcasts", "Public Speaking", "Event Planning",
|
| | "Interior Design", "Architecture", "Urban Development", "Agriculture",
|
| | "Climate Change", "Renewable Energy", "Space Exploration", "Biotechnology",
|
| | "Cryptocurrency", "Blockchain", "Robotics", "Automated Systems", "Genetics",
|
| | "Medicine", "Pharmacy", "Veterinary Science", "Marine Biology", "Ecology",
|
| | "Conservation", "Wildlife", "Botany", "Zoology", "Geology", "Meteorology",
|
| | "Aviation", "Maritime", "Logistics", "Supply Chain", "Human Resources",
|
| | "Diversity and Inclusion", "Ethics", "Corporate Governance", "Public Relations",
|
| | "Journalism", "Advertising", "Sales", "Customer Experience", "Retail",
|
| | "Hospitality", "Tourism", "Luxury Goods", "Consumer Electronics", "Fashion Design",
|
| | "Textiles", "Jewelry", "Cosmetics", "Skincare", "Perfume", "Toys", "Gadgets",
|
| | "Home Appliances", "Furniture", "Home Improvement", "Landscaping", "Real Estate Investment"
|
| | ]
|
| |
|
| |
|
| | styles = [
|
| | "Super Casual", "Internet Slang", "Every Day", "Formal", "Conversational",
|
| | "Bad Grammar and Spelling", "Lazy typing", "Professional", "Academic",
|
| | "Technical", "Narrative", "Descriptive", "Analytical", "Critical",
|
| | "Objective", "Subjective", "Third Person", "First Person",
|
| | "Persuasive", "Informative", "Journalistic", "Reflective",
|
| | "DM", "Social", "Informal", "Casual", "Colloquial"
|
| | ]
|
| |
|
| |
|
| | starting_phrases = [
|
| | "Have you ever wondered", "Let's talk about", "It's interesting how",
|
| | "Did you know", "The reality is", "Many people believe",
|
| | "It's surprising that", "You might not know", "Let's dive into",
|
| | "Here's the thing", "A common misconception is", "It's clear that",
|
| | "Most people don't realize", "One thing to note is",
|
| | "The fact is", "Consider this", "Here's an example",
|
| | "Think about", "For instance", "To illustrate",
|
| | "In my experience", "A key point is", "It's worth noting",
|
| | "Let's explore", "Interestingly enough", "I want to highlight",
|
| | "When it comes to", "The truth is", "Many experts agree",
|
| | "Research shows", "Statistics indicate", "It's often said",
|
| | "In reality", "From my perspective", "Surprisingly",
|
| | "One thing I've noticed", "In recent studies", "Let's break down",
|
| | "People often forget", "You should know", "Interestingly",
|
| | "It turns out", "As it happens", "Experts suggest",
|
| | "The surprising fact is", "It's commonly known", "Let's be honest",
|
| | "The reality of", "It's fascinating that", "Have you noticed",
|
| | "The thing is", "It's a fact that", "Let's not forget",
|
| | "Studies have shown", "A notable point is", "It's often overlooked",
|
| | "An important aspect is", "Let's take a closer look",
|
| | "It's essential to understand", "Interestingly, research suggests",
|
| | "One aspect to consider is", "It's beneficial to know",
|
| | "It's worth considering", "The interesting thing is", "Let's examine",
|
| | "A surprising fact is", "It's helpful to know", "One surprising element is",
|
| | "Imagine this", "Here's a thought", "You might be surprised",
|
| | "Think of it this way", "Here's an idea", "It's funny how",
|
| | "Let me tell you", "Picture this", "The question is",
|
| | "Believe it or not", "You won't believe", "Let's face it",
|
| | "The best part is", "What's interesting is", "I discovered that",
|
| | "It's amazing how", "The funny thing is", "Here's why",
|
| | "What if I told you", "It's worth mentioning", "This reminds me of",
|
| | "Let me explain", "Here's something new", "I realized that",
|
| | "Have you seen", "You might enjoy", "I learned that",
|
| | "It's clear to see", "What's fascinating is", "Here's a question",
|
| | "I heard that", "The cool part is", "Here's what happened",
|
| | "It appears that", "It's evident that", "Let me share",
|
| | "You'll find that", "What's notable is", "Consider the fact that",
|
| | "It's interesting to note", "Hello everyone", "Hi there",
|
| | "Greetings", "Hey folks", "Good morning", "Good afternoon",
|
| | "Good evening", "Hey", "What's up", "Hi", "Hello",
|
| | "Amazing!", "Serious?", "Wow...", "That's pretty cool.",
|
| | "Can you believe it?", "Unbelievable!", "Incredible!", "No way!",
|
| | "Check this out", "Guess what?", "Surprise!", "Fascinating!",
|
| | "Impressive!", "I don't get it?", "Really?", "What?",
|
| | "Why?", "How come?", "Is that so?", "Are you sure?",
|
| | "What do you think?", "By the way", "Just so you know",
|
| | "For your information", "Incidentally", "On a side note",
|
| | "As a reminder", "In addition", "Besides that",
|
| | "While we're on the subject", "Speaking of which",
|
| | "Have you", "Has anyone", "Would we", "Would it be",
|
| | "OK, now", "OK but", "OK you", "OK nobody",
|
| | "Here's a quick fact", "To put it simply", "Here's why this matters",
|
| | "Let's consider", "Now, think about this", "Take this into account",
|
| | "Here's something to think about", "On that note",
|
| | "Interestingly enough", "Just imagine", "That reminds me",
|
| | "As it turns out", "Here's a fun fact", "The reality of it is",
|
| | "By the way, did you know", "Interestingly", "Speaking of",
|
| | "Now, let's dive in", "You'll be surprised to know",
|
| | "I recently discovered", "Would you believe", "Can you imagine",
|
| | "What's more", "Even more interesting is"
|
| | ]
|
| |
|
| |
|
| | csv_file = "sentences.csv"
|
| | with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
|
| | writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
| | writer.writerow(["text", "label"])
|
| |
|
| |
|
| | def ensure_correct_quoting(text):
|
| |
|
| | if text.startswith('"') and text.endswith('"'):
|
| | return text
|
| | else:
|
| | return f'"{text}"'
|
| |
|
| |
|
| | row_count = 0
|
| | format_index = 0
|
| | topic_index = 0
|
| | style_index = 0
|
| |
|
| | while row_count < 100000:
|
| | for idx, sentiment in enumerate(sentiments):
|
| | format_type = formats[format_index % len(formats)]
|
| | format_index += 1
|
| | topic = topics[topic_index % len(topics)]
|
| | topic_index += 1
|
| | style = styles[style_index % len(styles)]
|
| | style_index += 1
|
| | start_phrase = random.choice(starting_phrases)
|
| |
|
| |
|
| | prompt = f"Start your paragraph with '{start_phrase}'. Write a single paragraph of text. Format: {format_type}. Topic: {topic}. Vibe: {sentiment}. Style: {style}."
|
| |
|
| | response = chatbot(prompt, max_new_tokens=100)
|
| |
|
| |
|
| | print(f"Full model response: {response}")
|
| |
|
| |
|
| | generated_text = response[0]['generated_text']
|
| |
|
| |
|
| | clean_text = generated_text.replace(prompt, "").strip().split('\n')[0]
|
| |
|
| |
|
| | correctly_quoted_text = ensure_correct_quoting(clean_text)
|
| |
|
| |
|
| | with open(csv_file, mode='a', newline='', encoding='utf-8') as file:
|
| | writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
| | writer.writerow([correctly_quoted_text, idx])
|
| |
|
| | row_count += 1
|
| | print(f"Response for sentiment '{sentiment}' saved to {csv_file}. Total rows: {row_count}")
|
| |
|
| | if row_count >= 100000:
|
| | break
|
| |
|
| | print("All responses saved. Total rows:", row_count)
|
| |
|