File size: 15,243 Bytes
dc19554 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
import csv
import random
# ============================
# CONFIGURATION
# ============================
OUTPUT_FILE = "rosetta_code_dataset.csv"
SAMPLES_PER_ALGO = 500 # High variation count for better matching
# ============================
# 1. THE ULTIMATE ALGORITHM LIBRARY
# ============================
ALGORITHMS = {
# ---------------------------
# BASIC MATH & LOGIC
# ---------------------------
"factorial": {
"prompts": ["factorial of a number", "calculate n!", "multiplication of 1 to n", "find factorial", "fact code"],
"python": "def factorial(n):\n return 1 if n == 0 else n * factorial(n-1)\nnum = int(input())\nprint(factorial(num))",
"cpp": "#include<iostream>\nusing namespace std;\nint factorial(int n) {\n return (n == 0) ? 1 : n * factorial(n - 1);\n}\nint main() {\n int n; cin>>n;\n cout << factorial(n);\n}",
"java": "import java.util.Scanner;\nclass Main {\n static int factorial(int n) {\n return (n == 0) ? 1 : n * factorial(n - 1);\n }\n public static void main(String[] args) {\n Scanner sc = new Scanner(System.in);\n System.out.println(factorial(sc.nextInt()));\n }\n}"
},
"fibonacci": {
"prompts": ["fibonacci series", "print fib numbers", "sequence 0 1 1 2 3", "fib series", "fibonacci recursion"],
"python": "n = int(input())\na, b = 0, 1\nfor _ in range(n):\n print(a, end=' ')\n a, b = b, a+b",
"cpp": "int n, a=0, b=1, next;\ncin >> n;\nfor (int i = 0; i < n; i++) {\n cout << a << \" \";\n next = a + b;\n a = b;\n b = next;\n}",
"java": "int n = 10, a = 0, b = 1;\nfor (int i = 0; i < n; i++) {\n System.out.print(a + \" \");\n int next = a + b;\n a = b;\n b = next;\n}"
},
"swap_two_numbers": {
"prompts": ["swap two numbers", "swap variables without temp", "exchange values", "swap logic"],
"python": "a = int(input())\nb = int(input())\na, b = b, a\nprint(a, b)",
"cpp": "int a, b;\ncin >> a >> b;\na = a + b;\nb = a - b;\na = a - b;\ncout << a << \" \" << b;",
"java": "int a = 5, b = 10;\na = a + b;\nb = a - b;\na = a - b;\nSystem.out.println(a + \" \" + b);"
},
"leap_year": {
"prompts": ["check leap year", "is year leap", "leap year logic", "days in february year"],
"python": "year = int(input())\nif (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0):\n print('Leap Year')\nelse:\n print('Not Leap Year')",
"cpp": "int year;\ncin >> year;\nif ((year % 4 == 0 && year % 100 != 0) || (year % 400 == 0))\n cout << \"Leap Year\";\nelse\n cout << \"Not Leap Year\";",
"java": "int year = 2024;\nif ((year % 4 == 0 && year % 100 != 0) || (year % 400 == 0))\n System.out.println(\"Leap Year\");\nelse\n System.out.println(\"Not Leap Year\");"
},
"odd_even": {
"prompts": ["check odd even", "is number divisible by 2", "find even number", "odd number logic"],
"python": "num = int(input())\nif num % 2 == 0: print('Even')\nelse: print('Odd')",
"cpp": "int n; cin >> n;\nif(n % 2 == 0) cout << \"Even\";\nelse cout << \"Odd\";",
"java": "int n = 5;\nif(n % 2 == 0) System.out.println(\"Even\");\nelse System.out.println(\"Odd\");"
},
"lcm_hcf": {
"prompts": ["lcm and hcf", "gcd of two numbers", "least common multiple", "highest common factor"],
"python": "import math\na, b = 12, 15\ngcd = math.gcd(a, b)\nlcm = (a*b)//gcd\nprint('HCF:', gcd, 'LCM:', lcm)",
"cpp": "int gcd(int a, int b) { return b==0?a:gcd(b, a%b); }\nint main() {\n int a=12, b=15;\n cout<<\"HCF: \"<<gcd(a,b);\n cout<<\"LCM: \"<<(a*b)/gcd(a,b);\n}",
"java": "static int gcd(int a, int b) { return b==0?a:gcd(b, a%b); }\npublic static void main(String[] args) {\n int a=12, b=15;\n System.out.println(\"HCF: \"+gcd(a,b));\n System.out.println(\"LCM: \"+(a*b)/gcd(a,b));\n}"
},
# ---------------------------
# NUMBER THEORY
# ---------------------------
"prime_check": {
"prompts": ["check prime number", "is prime or not", "prime no program", "find if number is prime"],
"python": "num = int(input())\nif num > 1:\n for i in range(2, int(num**0.5)+1):\n if (num % i) == 0: print('Not Prime'); break\n else: print('Prime')\nelse: print('Not Prime')",
"cpp": "bool isPrime(int n) {\n if (n <= 1) return false;\n for (int i = 2; i * i <= n; i++)\n if (n % i == 0) return false;\n return true;\n}",
"java": "boolean isPrime(int n) {\n if (n <= 1) return false;\n for (int i = 2; i * i <= n; i++)\n if (n % i == 0) return false;\n return true;\n}"
},
"armstrong": {
"prompts": ["armstrong number", "sum of cubes of digits", "check armstrong", "narcissistic number"],
"python": "n = int(input())\nsum = 0\ntemp = n\nwhile temp > 0:\n digit = temp % 10\n sum += digit ** 3\n temp //= 10\nif n == sum: print('Armstrong')\nelse: print('Not Armstrong')",
"cpp": "int n, r, sum=0, temp;\ncin >> n;\ntemp = n;\nwhile(n>0){r=n%10;sum=sum+(r*r*r);n=n/10;}\nif(temp==sum) cout<<\"Armstrong\";\nelse cout<<\"Not\";",
"java": "int n=153, r, sum=0, temp;\ntemp = n;\nwhile(n>0){r=n%10;sum=sum+(r*r*r);n=n/10;}\nif(temp==sum) System.out.println(\"Armstrong\");\nelse System.out.println(\"Not\");"
},
"palindrome_number": {
"prompts": ["palindrome number", "reverse number equal", "check number palindrome"],
"python": "n = input()\nif n == n[::-1]: print('Palindrome')\nelse: print('Not Palindrome')",
"cpp": "int n, r, sum=0, temp;\ncin >> n;\ntemp = n;\nwhile(n>0){r=n%10;sum=(sum*10)+r;n=n/10;}\nif(temp==sum) cout<<\"Palindrome\";\nelse cout<<\"Not\";",
"java": "int n=121, r, sum=0, temp;\ntemp = n;\nwhile(n>0){r=n%10;sum=(sum*10)+r;n=n/10;}\nif(temp==sum) System.out.println(\"Palindrome\");\nelse System.out.println(\"Not\");"
},
"sum_of_digits": {
"prompts": ["sum of digits", "add all digits of number", "digit sum logic"],
"python": "n = int(input())\ns = 0\nwhile n > 0:\n s += n % 10\n n //= 10\nprint(s)",
"cpp": "int n, sum=0;\ncin >> n;\nwhile(n>0) { sum += n%10; n/=10; }\ncout << sum;",
"java": "int n=123, sum=0;\nwhile(n>0) { sum += n%10; n/=10; }\nSystem.out.println(sum);"
},
"decimal_to_binary": {
"prompts": ["decimal to binary", "convert dec to bin", "binary of number"],
"python": "n = int(input())\nprint(bin(n).replace('0b', ''))",
"cpp": "void decToBinary(int n) {\n int binaryNum[32];\n int i = 0;\n while (n > 0) {\n binaryNum[i] = n % 2;\n n = n / 2;\n i++;\n }\n for (int j = i - 1; j >= 0; j--) cout << binaryNum[j];\n}",
"java": "void decToBinary(int n) {\n System.out.println(Integer.toBinaryString(n));\n}"
},
# ---------------------------
# ARRAYS & MATRICES
# ---------------------------
"bubble_sort": {
"prompts": ["bubble sort", "sort array ascending", "sorting algorithm", "arrange elements"],
"python": "arr = [64, 34, 25, 12, 22, 11, 90]\nn = len(arr)\nfor i in range(n):\n for j in range(0, n-i-1):\n if arr[j] > arr[j+1]: arr[j], arr[j+1] = arr[j+1], arr[j]\nprint(arr)",
"cpp": "void bubbleSort(int arr[], int n) {\n for (int i = 0; i < n-1; i++)\n for (int j = 0; j < n-i-1; j++)\n if (arr[j] > arr[j+1]) swap(arr[j], arr[j+1]);\n}",
"java": "void bubbleSort(int arr[]) {\n int n = arr.length;\n for (int i = 0; i < n-1; i++)\n for (int j = 0; j < n-i-1; j++)\n if (arr[j] > arr[j+1]) {\n int temp = arr[j]; arr[j] = arr[j+1]; arr[j+1] = temp;\n }\n}"
},
"linear_search": {
"prompts": ["linear search", "find element in array", "search number list"],
"python": "arr = [10, 20, 30, 40]\nx = 30\nif x in arr: print('Found')\nelse: print('Not Found')",
"cpp": "int search(int arr[], int n, int x) {\n for (int i = 0; i < n; i++)\n if (arr[i] == x) return i;\n return -1;\n}",
"java": "int search(int arr[], int x) {\n for (int i = 0; i < arr.length; i++)\n if (arr[i] == x) return i;\n return -1;\n}"
},
"largest_in_array": {
"prompts": ["largest element in array", "max in array", "find biggest number in list"],
"python": "arr = [10, 324, 45, 90, 9808]\nprint(max(arr))",
"cpp": "int largest(int arr[], int n) {\n int max = arr[0];\n for (int i = 1; i < n; i++)\n if (arr[i] > max) max = arr[i];\n return max;\n}",
"java": "int largest(int arr[]) {\n int max = arr[0];\n for (int i = 1; i < arr.length; i++)\n if (arr[i] > max) max = arr[i];\n return max;\n}"
},
"matrix_add": {
"prompts": ["matrix addition", "add two matrices", "sum of matrix"],
"python": "X = [[1,2,3], [4 ,5,6], [7 ,8,9]]\nY = [[9,8,7], [6,5,4], [3,2,1]]\nresult = [[X[i][j] + Y[i][j] for j in range(len(X[0]))] for i in range(len(X))]\nfor r in result: print(r)",
"cpp": "void addMatrix(int A[3][3], int B[3][3]) {\n for(int i=0;i<3;i++) {\n for(int j=0;j<3;j++) cout<<A[i][j]+B[i][j]<<\" \";\n cout<<endl;\n }\n}",
"java": "void addMatrix(int A[][], int B[][]) {\n for(int i=0;i<3;i++) {\n for(int j=0;j<3;j++) System.out.print(A[i][j]+B[i][j]+\" \");\n System.out.println();\n }\n}"
},
"matrix_transpose": {
"prompts": ["matrix transpose", "transpose of matrix", "swap rows and columns"],
"python": "X = [[12,7], [4 ,5], [3 ,8]]\nresult = [[X[j][i] for j in range(len(X))] for i in range(len(X[0]))]\nfor r in result: print(r)",
"cpp": "void transpose(int A[3][3]) {\n for(int i=0;i<3;i++) {\n for(int j=0;j<3;j++) cout<<A[j][i]<<\" \";\n cout<<endl;\n }\n}",
"java": "void transpose(int A[][]) {\n for(int i=0;i<3;i++) {\n for(int j=0;j<3;j++) System.out.print(A[j][i]+\" \");\n System.out.println();\n }\n}"
},
# ---------------------------
# STRINGS
# ---------------------------
"string_palindrome": {
"prompts": ["string palindrome", "check word palindrome", "reverse string equal"],
"python": "s = input()\nif s == s[::-1]: print('Palindrome')\nelse: print('Not Palindrome')",
"cpp": "string s; cin >> s;\nstring rev = string(s.rbegin(), s.rend());\nif (s == rev) cout << \"Palindrome\";\nelse cout << \"Not\";",
"java": "String str = \"madam\", rev = \"\";\nfor (int i = str.length() - 1; i >= 0; i--) rev = rev + str.charAt(i);\nif (str.equals(rev)) System.out.println(\"Palindrome\");"
},
"vowel_count": {
"prompts": ["count vowels", "number of vowels in string", "vowel consonant count"],
"python": "s = input().lower()\ncount = 0\nfor char in s:\n if char in 'aeiou': count += 1\nprint(count)",
"cpp": "string s; cin >> s;\nint count = 0;\nfor(char c : s) {\n if(c=='a'||c=='e'||c=='i'||c=='o'||c=='u') count++;\n}\ncout << count;",
"java": "String s = \"hello\";\nint count = 0;\nfor(int i=0; i<s.length(); i++) {\n char c = s.charAt(i);\n if(c=='a'||c=='e'||c=='i'||c=='o'||c=='u') count++;\n}"
},
# ---------------------------
# PATTERNS (EXAM FAVORITES)
# ---------------------------
"star_pyramid": {
"prompts": ["star pyramid", "triangle star pattern", "print pyramid"],
"python": "n = 5\nfor i in range(n):\n print(' '*(n-i-1) + '* '*(i+1))",
"cpp": "int n=5;\nfor(int i=1; i<=n; i++) {\n for(int j=1; j<=n-i; j++) cout<<\" \";\n for(int j=1; j<=i; j++) cout<<\"* \";\n cout<<endl;\n}",
"java": "int n=5;\nfor(int i=1; i<=n; i++) {\n for(int j=1; j<=n-i; j++) System.out.print(\" \");\n for(int j=1; j<=i; j++) System.out.print(\"* \");\n System.out.println();\n}"
},
"right_triangle": {
"prompts": ["right angle triangle", "star pattern right", "simple star pattern"],
"python": "n=5\nfor i in range(1, n+1):\n print('* ' * i)",
"cpp": "for(int i=1; i<=5; i++){\n for(int j=1; j<=i; j++) cout<<\"* \";\n cout<<endl;\n}",
"java": "for(int i=1; i<=5; i++){\n for(int j=1; j<=i; j++) System.out.print(\"* \");\n System.out.println();\n}"
},
# ---------------------------
# UTILITY
# ---------------------------
"calculator": {
"prompts": ["simple calculator", "add sub mul div", "switch case calculator", "calc program"],
"python": "def calc(a, b, op):\n if op == '+': return a + b\n elif op == '-': return a - b\n elif op == '*': return a * b\n elif op == '/': return a / b",
"cpp": "switch(op) {\n case '+': cout << a+b; break;\n case '-': cout << a-b; break;\n case '*': cout << a*b; break;\n case '/': cout << a/b; break;\n}",
"java": "switch(op) {\n case '+': System.out.println(a+b); break;\n case '-': System.out.println(a-b); break;\n case '*': System.out.println(a*b); break;\n case '/': System.out.println(a/b); break;\n}"
},
"hello_world": {
"prompts": ["hello world", "print hello", "basic program", "test code"],
"python": "print(\"Hello World\")",
"cpp": "#include <iostream>\nusing namespace std;\nint main() {\n cout << \"Hello World\";\n return 0;\n}",
"java": "public class Main {\n public static void main(String[] args) {\n System.out.println(\"Hello World\");\n }\n}"
}
}
# ============================
# 2. GENERATOR LOGIC
# ============================
def generate_dataset():
print("Generating THE ULTIMATE Rosetta Stone Dataset...")
data = []
for algo_key, templates in ALGORITHMS.items():
base_prompts = templates["prompts"]
for _ in range(SAMPLES_PER_ALGO):
# 1. Randomize Prompt
prompt_base = random.choice(base_prompts)
lang = random.choice(["python", "cpp", "java"])
# 2. Create natural language variation
variations = [
f"{prompt_base} in {lang}",
f"write {lang} code for {prompt_base}",
f"how to {prompt_base} using {lang}",
f"program for {prompt_base} in {lang}",
f"give me {prompt_base} code {lang}"
]
query = random.choice(variations)
# 3. Get Code
code = templates[lang]
data.append([query, lang, code])
# Save
with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(["prompt", "language", "code"])
writer.writerows(data)
print(f"✅ Created {len(data)} training samples covering {len(ALGORITHMS)} major topics.")
print(f"Saved to {OUTPUT_FILE}")
if __name__ == "__main__":
generate_dataset() |