smart-line-bot / scripts /generate_test_data.py
Smiel2's picture
Initial commit
2eae977 verified
Raw
History Blame Contribute Delete
7.81 kB
#!/usr/bin/env python
"""
Test data generator for development and testing.
Generates realistic sample data for users, LINE users, scraping jobs, and AI conversations.
"""
import os
import sys
import random
import argparse
from datetime import datetime, timedelta
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from faker import Faker
from app.core.config import settings
from app.models.user import User
from app.models.line_user import LINEUser
from app.models.scraping_job import ScrapingJob
from app.models.scraping_result import ScrapingResult
from app.models.ai_conversation import AIConversation, AIMessage
fake = Faker(["zh_TW", "en_US"])
Faker.seed(42)
random.seed(42)
def get_db_session():
"""Create database session."""
engine = create_engine(settings.DATABASE_URL)
Session = sessionmaker(bind=engine)
return Session()
def generate_users(session, count: int = 10) -> list:
"""Generate sample users."""
print(f"Generating {count} users...")
users = []
for i in range(count):
user = User(
email=f"user{i+1}@example.com",
username=f"user_{i+1}",
full_name=fake.name(),
is_active=True,
is_superuser=False,
created_at=datetime.utcnow() - timedelta(days=random.randint(1, 30)),
)
user.set_password("password123")
session.add(user)
users.append(user)
session.commit()
print(f"Created {len(users)} users")
return users
def generate_line_users(session, users: list, count: int = 20) -> list:
"""Generate LINE users."""
print(f"Generating {count} LINE users...")
line_users = []
for i in range(count):
user = random.choice(users)
line_user = LINEUser(
line_user_id=f"U{random.randint(100000000000, 999999999999)}",
user_id=user.id,
display_name=fake.name(),
picture_url=f"https://example.com/avatar/{i}.jpg",
status_message=fake.sentence(),
is_followed=True,
created_at=datetime.utcnow() - timedelta(days=random.randint(1, 30)),
)
session.add(line_user)
line_users.append(line_user)
session.commit()
print(f"Created {len(line_users)} LINE users")
return line_users
def generate_scraping_jobs(session, users: list, count: int = 50) -> list:
"""Generate scraping jobs."""
print(f"Generating {count} scraping jobs...")
jobs = []
sources = ["news", "ecommerce", "social", "blog", "forum"]
statuses = ["pending", "running", "completed", "failed"]
for i in range(count):
user = random.choice(users)
status = random.choices(
statuses,
weights=[30, 20, 40, 10]
)[0]
created_at = datetime.utcnow() - timedelta(days=random.randint(1, 30))
job = ScrapingJob(
user_id=user.id,
url=f"https://example{i}.com/page",
source=random.choice(sources),
status=status,
priority=random.randint(1, 5),
created_at=created_at,
started_at=created_at + timedelta(minutes=random.randint(1, 60)) if status in ["running", "completed", "failed"] else None,
completed_at=created_at + timedelta(hours=random.randint(1, 5)) if status in ["completed", "failed"] else None,
error_message=None if status != "failed" else fake.sentence(),
)
session.add(job)
jobs.append(job)
# Add some results for completed jobs
if status == "completed" and random.random() > 0.5:
result = ScrapingResult(
job_id=job.id,
url=job.url,
title=f"Page Title {i}",
content=fake.paragraph(),
metadata={"scraped_by": "Botsaurus", "version": "1.0"},
created_at=job.completed_at,
)
session.add(result)
session.commit()
print(f"Created {len(jobs)} scraping jobs")
return jobs
def generate_ai_conversations(session, users: list, count: int = 30) -> list:
"""Generate AI conversations and messages."""
print(f"Generating {count} AI conversations...")
conversations = []
for i in range(count):
user = random.choice(users)
created_at = datetime.utcnow() - timedelta(days=random.randint(1, 30))
conv = AIConversation(
user_id=user.id,
title=f"Conversation {i+1}",
model="gpt-4",
is_active=random.choice([True, False]),
created_at=created_at,
updated_at=created_at + timedelta(hours=random.randint(1, 10)),
)
session.add(conv)
conversations.append(conv)
# Add messages to some conversations
if random.random() > 0.3:
num_messages = random.randint(2, 10)
for j in range(num_messages):
is_user = j % 2 == 0
msg = AIMessage(
conversation_id=conv.id,
role="user" if is_user else "assistant",
content=fake.paragraph() if is_user else fake.sentence(),
created_at=created_at + timedelta(minutes=j * 5),
)
session.add(msg)
session.commit()
print(f"Created {len(conversations)} AI conversations")
return conversations
def generate_all(session, counts: dict):
"""Generate all test data."""
print("=" * 50)
print("Generating test data...")
print("=" * 50)
users = generate_users(session, counts.get("users", 10))
line_users = generate_line_users(session, users, counts.get("line_users", 20))
jobs = generate_scraping_jobs(session, users, counts.get("scraping_jobs", 50))
convs = generate_ai_conversations(session, users, counts.get("ai_conversations", 30))
print("=" * 50)
print("Test data generation complete!")
print("=" * 50)
print(f"Users: {len(users)}")
print(f"LINE Users: {len(line_users)}")
print(f"Scraping Jobs: {len(jobs)}")
print(f"AI Conversations: {len(convs)}")
def clear_all_data(session):
"""Clear all data from tables."""
print("Clearing all data...")
session.query(AIMessage).delete()
session.query(AIConversation).delete()
session.query(ScrapingResult).delete()
session.query(ScrapingJob).delete()
session.query(LINEUser).delete()
session.query(User).delete()
session.commit()
print("All data cleared!")
def main():
parser = argparse.ArgumentParser(description="Generate test data for the application")
parser.add_argument("--clear", action="store_true", help="Clear all data before generating")
parser.add_argument("--users", type=int, default=10, help="Number of users to generate")
parser.add_argument("--line-users", type=int, default=20, help="Number of LINE users to generate")
parser.add_argument("--scraping-jobs", type=int, default=50, help="Number of scraping jobs to generate")
parser.add_argument("--ai-conversations", type=int, default=30, help="Number of AI conversations to generate")
args = parser.parse_args()
session = get_db_session()
try:
if args.clear:
clear_all_data(session)
counts = {
"users": args.users,
"line_users": args.line_users,
"scraping_jobs": args.scraping_jobs,
"ai_conversations": args.ai_conversations,
}
generate_all(session, counts)
finally:
session.close()
if __name__ == "__main__":
main()