| import csv
|
| import torch
|
| from transformers import pipeline
|
|
|
|
|
| chatbot = pipeline(
|
| "text-generation",
|
| model="mistralai/Mistral-7B-Instruct-v0.3",
|
| torch_dtype=torch.float16,
|
| device=0
|
| )
|
|
|
|
|
| sentiments = ["Positive", "Neutral", "Negative"]
|
|
|
|
|
| formats = [
|
| "Feature Stories", "Instructional Manuals", "FAQs", "Policy Documents", "Live Stream Descriptions",
|
| "Editorial Content", "Research Papers", "User Manuals", "Commentaries", "Opinion Pieces",
|
| "Newsletters", "Online Courses", "Photo Essays", "Annual Reports", "User-Generated Content",
|
| "Testimonials", "DIY Content", "How-To Videos", "Campaign Reports", "Legal Briefs",
|
| "Blog Posts", "Case Studies", "Tutorials", "Interviews", "Press Releases",
|
| "eBooks", "Infographics", "Webinars", "Podcast Descriptions", "Video Scripts",
|
| "Advertisements", "Forum Discussions", "Whitepapers", "Surveys", "Product Reviews",
|
| "Event Summaries", "Opinion Editorials", "Letters to the Editor", "Round-Up Posts",
|
| "Buying Guides", "Checklists", "Cheat Sheets", "Recipes", "Travel Guides",
|
| "Profiles", "Lists", "Q&A Sessions", "Debates", "Polls"
|
| ]
|
|
|
|
|
| topics = [
|
| "Family", "Travel", "Politics", "Science", "Health", "Technology", "Sports",
|
| "Education", "Environment", "Economics", "Culture", "History", "Music",
|
| "Literature", "Food", "Art", "Fashion", "Entertainment", "Business",
|
| "Relationships", "Fitness", "Automotive", "Finance", "Real Estate", "Law",
|
| "Psychology", "Philosophy", "Religion", "Gardening", "DIY", "Hobbies",
|
| "Pets", "Career", "Marketing", "Customer Service", "Networking", "Innovation",
|
| "Artificial Intelligence", "Sustainability", "Social Issues", "Digital Media",
|
| "Programming", "Cybersecurity", "Astronomy", "Geography", "Travel Tips",
|
| "Cooking", "Parenting", "Productivity", "Mindfulness", "Mental Health",
|
| "Self-Improvement", "Leadership", "Teamwork", "Volunteering", "Nonprofits",
|
| "Gaming", "E-commerce", "Photography", "Videography", "Film", "Television",
|
| "Streaming Services", "Podcasts", "Public Speaking", "Event Planning",
|
| "Interior Design", "Architecture", "Urban Development", "Agriculture",
|
| "Climate Change", "Renewable Energy", "Space Exploration", "Biotechnology",
|
| "Cryptocurrency", "Blockchain", "Robotics", "Automated Systems", "Genetics",
|
| "Medicine", "Pharmacy", "Veterinary Science", "Marine Biology", "Ecology",
|
| "Conservation", "Wildlife", "Botany", "Zoology", "Geology", "Meteorology",
|
| "Aviation", "Maritime", "Logistics", "Supply Chain", "Human Resources",
|
| "Diversity and Inclusion", "Ethics", "Corporate Governance", "Public Relations",
|
| "Journalism", "Advertising", "Sales", "Customer Experience", "Retail",
|
| "Hospitality", "Tourism", "Luxury Goods", "Consumer Electronics", "Fashion Design",
|
| "Textiles", "Jewelry", "Cosmetics", "Skincare", "Perfume", "Toys", "Gadgets",
|
| "Home Appliances", "Furniture", "Home Improvement", "Landscaping", "Real Estate Investment"
|
| ]
|
|
|
|
|
| csv_file = "sentences.csv"
|
| with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
|
| writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
| writer.writerow(["text", "label"])
|
|
|
|
|
| def ensure_correct_quoting(text):
|
|
|
| if text.startswith('"') and text.endswith('"'):
|
| return text
|
| else:
|
| return f'"{text}"'
|
|
|
|
|
| row_count = 0
|
| format_index = 0
|
| topic_index = 0
|
|
|
| while row_count < 100000:
|
| for idx, sentiment in enumerate(sentiments):
|
| format_type = formats[format_index % len(formats)]
|
| format_index += 1
|
| topic = topics[topic_index % len(topics)]
|
| topic_index += 1
|
|
|
|
|
| prompt = f"Write a single sentence of web content in Croatian. Content type: {format_type}. Topic: {topic}. Sentiment: {sentiment}."
|
|
|
| response = chatbot(prompt, max_new_tokens=100)
|
|
|
|
|
| print(f"Full model response: {response}")
|
|
|
|
|
| generated_text = response[0]['generated_text']
|
|
|
|
|
| clean_text = generated_text.replace(prompt, "").strip().split('\n')[0]
|
|
|
|
|
| correctly_quoted_text = ensure_correct_quoting(clean_text)
|
|
|
|
|
| with open(csv_file, mode='a', newline='', encoding='utf-8') as file:
|
| writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
| writer.writerow([correctly_quoted_text, idx])
|
|
|
| row_count += 1
|
| print(f"Response for sentiment '{sentiment}' saved to {csv_file}. Total rows: {row_count}")
|
|
|
| if row_count >= 100000:
|
| break
|
|
|
| print("All responses saved. Total rows:", row_count)
|
|
|