Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from database_connection import DatabaseConnection | |
| def create_basic_training_data(): | |
| """Create basic training data for the course recommender - DEPRECATED""" | |
| print("WARNING: Basic training data is deprecated. Use student feedback data instead.") | |
| raise ValueError("Basic training data is no longer used. Please use student feedback data from /student_feedback_counts endpoint.") | |
| # Define strands | |
| strands = ["STEM", "ABM", "HUMSS", "GAS", "TVL"] | |
| # Define common hobbies | |
| hobbies_list = [ | |
| "Programming", "Reading", "Sports", "Music", "Art", "Gaming", | |
| "Photography", "Writing", "Dancing", "Cooking", "Traveling", | |
| "Mathematics", "Science", "History", "Literature", "Technology" | |
| ] | |
| # Generate synthetic data | |
| np.random.seed(42) # For reproducible results | |
| n_samples = 1000 | |
| data = [] | |
| for _ in range(n_samples): | |
| # Generate random but realistic data | |
| stanine = np.random.randint(1, 10) | |
| gwa = np.random.uniform(75, 100) # GWA between 75-100 | |
| strand = np.random.choice(strands) | |
| course = np.random.choice(courses) | |
| hobbies = np.random.choice(hobbies_list, size=np.random.randint(1, 4), replace=False) | |
| hobbies_str = ", ".join(hobbies) | |
| # Generate rating based on some logic | |
| if stanine >= 7 and gwa >= 85: | |
| rating = np.random.choice([4, 5], p=[0.3, 0.7]) | |
| elif stanine >= 5 and gwa >= 80: | |
| rating = np.random.choice([3, 4, 5], p=[0.2, 0.5, 0.3]) | |
| else: | |
| rating = np.random.choice([1, 2, 3, 4], p=[0.1, 0.3, 0.4, 0.2]) | |
| count = np.random.randint(1, 10) | |
| data.append({ | |
| 'course': course, | |
| 'stanine': stanine, | |
| 'gwa': gwa, | |
| 'strand': strand, | |
| 'rating': rating, | |
| 'hobbies': hobbies_str, | |
| 'count': count | |
| }) | |
| return pd.DataFrame(data) | |
| def save_basic_data(): | |
| """Save basic training data to CSV""" | |
| df = create_basic_training_data() | |
| df.to_csv('basic_training_data.csv', index=False) | |
| print(f"Basic training data saved with {len(df)} samples") | |
| return df | |
| if __name__ == "__main__": | |
| save_basic_data() |