markobinario commited on
Commit
0e1524e
·
verified ·
1 Parent(s): 8691479

Update basic_training_data.py

Browse files
Files changed (1) hide show
  1. basic_training_data.py +64 -70
basic_training_data.py CHANGED
@@ -1,70 +1,64 @@
1
- import pandas as pd
2
- import numpy as np
3
-
4
- def create_basic_training_data():
5
- """Create basic training data for the course recommender"""
6
-
7
- # Define available courses
8
- courses = [
9
- "Computer Science", "Information Technology", "Data Science",
10
- "Software Engineering", "Cybersecurity", "Computer Engineering",
11
- "Business Administration", "Marketing", "Finance", "Accounting",
12
- "Psychology", "Education", "Literature", "History", "Philosophy",
13
- "Nursing", "Medicine", "Engineering", "Architecture", "Design"
14
- ]
15
-
16
- # Define strands
17
- strands = ["STEM", "ABM", "HUMSS", "GAS", "TVL"]
18
-
19
- # Define common hobbies
20
- hobbies_list = [
21
- "Programming", "Reading", "Sports", "Music", "Art", "Gaming",
22
- "Photography", "Writing", "Dancing", "Cooking", "Traveling",
23
- "Mathematics", "Science", "History", "Literature", "Technology"
24
- ]
25
-
26
- # Generate synthetic data
27
- np.random.seed(42) # For reproducible results
28
- n_samples = 1000
29
-
30
- data = []
31
- for _ in range(n_samples):
32
- # Generate random but realistic data
33
- stanine = np.random.randint(1, 10)
34
- gwa = np.random.uniform(75, 100) # GWA between 75-100
35
- strand = np.random.choice(strands)
36
- course = np.random.choice(courses)
37
- hobbies = np.random.choice(hobbies_list, size=np.random.randint(1, 4), replace=False)
38
- hobbies_str = ", ".join(hobbies)
39
-
40
- # Generate rating based on some logic
41
- if stanine >= 7 and gwa >= 85:
42
- rating = np.random.choice([4, 5], p=[0.3, 0.7])
43
- elif stanine >= 5 and gwa >= 80:
44
- rating = np.random.choice([3, 4, 5], p=[0.2, 0.5, 0.3])
45
- else:
46
- rating = np.random.choice([1, 2, 3, 4], p=[0.1, 0.3, 0.4, 0.2])
47
-
48
- count = np.random.randint(1, 10)
49
-
50
- data.append({
51
- 'course': course,
52
- 'stanine': stanine,
53
- 'gwa': gwa,
54
- 'strand': strand,
55
- 'rating': rating,
56
- 'hobbies': hobbies_str,
57
- 'count': count
58
- })
59
-
60
- return pd.DataFrame(data)
61
-
62
- def save_basic_data():
63
- """Save basic training data to CSV"""
64
- df = create_basic_training_data()
65
- df.to_csv('basic_training_data.csv', index=False)
66
- print(f"Basic training data saved with {len(df)} samples")
67
- return df
68
-
69
- if __name__ == "__main__":
70
- save_basic_data()
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from database_connection import DatabaseConnection
4
+
5
+ def create_basic_training_data():
6
+ """Create basic training data for the course recommender - DEPRECATED"""
7
+ print("WARNING: Basic training data is deprecated. Use student feedback data instead.")
8
+ raise ValueError("Basic training data is no longer used. Please use student feedback data from /student_feedback_counts endpoint.")
9
+
10
+ # Define strands
11
+ strands = ["STEM", "ABM", "HUMSS", "GAS", "TVL"]
12
+
13
+ # Define common hobbies
14
+ hobbies_list = [
15
+ "Programming", "Reading", "Sports", "Music", "Art", "Gaming",
16
+ "Photography", "Writing", "Dancing", "Cooking", "Traveling",
17
+ "Mathematics", "Science", "History", "Literature", "Technology"
18
+ ]
19
+
20
+ # Generate synthetic data
21
+ np.random.seed(42) # For reproducible results
22
+ n_samples = 1000
23
+
24
+ data = []
25
+ for _ in range(n_samples):
26
+ # Generate random but realistic data
27
+ stanine = np.random.randint(1, 10)
28
+ gwa = np.random.uniform(75, 100) # GWA between 75-100
29
+ strand = np.random.choice(strands)
30
+ course = np.random.choice(courses)
31
+ hobbies = np.random.choice(hobbies_list, size=np.random.randint(1, 4), replace=False)
32
+ hobbies_str = ", ".join(hobbies)
33
+
34
+ # Generate rating based on some logic
35
+ if stanine >= 7 and gwa >= 85:
36
+ rating = np.random.choice([4, 5], p=[0.3, 0.7])
37
+ elif stanine >= 5 and gwa >= 80:
38
+ rating = np.random.choice([3, 4, 5], p=[0.2, 0.5, 0.3])
39
+ else:
40
+ rating = np.random.choice([1, 2, 3, 4], p=[0.1, 0.3, 0.4, 0.2])
41
+
42
+ count = np.random.randint(1, 10)
43
+
44
+ data.append({
45
+ 'course': course,
46
+ 'stanine': stanine,
47
+ 'gwa': gwa,
48
+ 'strand': strand,
49
+ 'rating': rating,
50
+ 'hobbies': hobbies_str,
51
+ 'count': count
52
+ })
53
+
54
+ return pd.DataFrame(data)
55
+
56
+ def save_basic_data():
57
+ """Save basic training data to CSV"""
58
+ df = create_basic_training_data()
59
+ df.to_csv('basic_training_data.csv', index=False)
60
+ print(f"Basic training data saved with {len(df)} samples")
61
+ return df
62
+
63
+ if __name__ == "__main__":
64
+ save_basic_data()