|
|
import csv
|
|
|
import random
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
OUTPUT_FILE = "rosetta_code_dataset.csv"
|
|
|
SAMPLES_PER_ALGO = 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ALGORITHMS = {
|
|
|
|
|
|
|
|
|
|
|
|
"factorial": {
|
|
|
"prompts": ["factorial of a number", "calculate n!", "multiplication of 1 to n", "find factorial", "fact code"],
|
|
|
"python": "def factorial(n):\n return 1 if n == 0 else n * factorial(n-1)\nnum = int(input())\nprint(factorial(num))",
|
|
|
"cpp": "#include<iostream>\nusing namespace std;\nint factorial(int n) {\n return (n == 0) ? 1 : n * factorial(n - 1);\n}\nint main() {\n int n; cin>>n;\n cout << factorial(n);\n}",
|
|
|
"java": "import java.util.Scanner;\nclass Main {\n static int factorial(int n) {\n return (n == 0) ? 1 : n * factorial(n - 1);\n }\n public static void main(String[] args) {\n Scanner sc = new Scanner(System.in);\n System.out.println(factorial(sc.nextInt()));\n }\n}"
|
|
|
},
|
|
|
"fibonacci": {
|
|
|
"prompts": ["fibonacci series", "print fib numbers", "sequence 0 1 1 2 3", "fib series", "fibonacci recursion"],
|
|
|
"python": "n = int(input())\na, b = 0, 1\nfor _ in range(n):\n print(a, end=' ')\n a, b = b, a+b",
|
|
|
"cpp": "int n, a=0, b=1, next;\ncin >> n;\nfor (int i = 0; i < n; i++) {\n cout << a << \" \";\n next = a + b;\n a = b;\n b = next;\n}",
|
|
|
"java": "int n = 10, a = 0, b = 1;\nfor (int i = 0; i < n; i++) {\n System.out.print(a + \" \");\n int next = a + b;\n a = b;\n b = next;\n}"
|
|
|
},
|
|
|
"swap_two_numbers": {
|
|
|
"prompts": ["swap two numbers", "swap variables without temp", "exchange values", "swap logic"],
|
|
|
"python": "a = int(input())\nb = int(input())\na, b = b, a\nprint(a, b)",
|
|
|
"cpp": "int a, b;\ncin >> a >> b;\na = a + b;\nb = a - b;\na = a - b;\ncout << a << \" \" << b;",
|
|
|
"java": "int a = 5, b = 10;\na = a + b;\nb = a - b;\na = a - b;\nSystem.out.println(a + \" \" + b);"
|
|
|
},
|
|
|
"leap_year": {
|
|
|
"prompts": ["check leap year", "is year leap", "leap year logic", "days in february year"],
|
|
|
"python": "year = int(input())\nif (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0):\n print('Leap Year')\nelse:\n print('Not Leap Year')",
|
|
|
"cpp": "int year;\ncin >> year;\nif ((year % 4 == 0 && year % 100 != 0) || (year % 400 == 0))\n cout << \"Leap Year\";\nelse\n cout << \"Not Leap Year\";",
|
|
|
"java": "int year = 2024;\nif ((year % 4 == 0 && year % 100 != 0) || (year % 400 == 0))\n System.out.println(\"Leap Year\");\nelse\n System.out.println(\"Not Leap Year\");"
|
|
|
},
|
|
|
"odd_even": {
|
|
|
"prompts": ["check odd even", "is number divisible by 2", "find even number", "odd number logic"],
|
|
|
"python": "num = int(input())\nif num % 2 == 0: print('Even')\nelse: print('Odd')",
|
|
|
"cpp": "int n; cin >> n;\nif(n % 2 == 0) cout << \"Even\";\nelse cout << \"Odd\";",
|
|
|
"java": "int n = 5;\nif(n % 2 == 0) System.out.println(\"Even\");\nelse System.out.println(\"Odd\");"
|
|
|
},
|
|
|
"lcm_hcf": {
|
|
|
"prompts": ["lcm and hcf", "gcd of two numbers", "least common multiple", "highest common factor"],
|
|
|
"python": "import math\na, b = 12, 15\ngcd = math.gcd(a, b)\nlcm = (a*b)//gcd\nprint('HCF:', gcd, 'LCM:', lcm)",
|
|
|
"cpp": "int gcd(int a, int b) { return b==0?a:gcd(b, a%b); }\nint main() {\n int a=12, b=15;\n cout<<\"HCF: \"<<gcd(a,b);\n cout<<\"LCM: \"<<(a*b)/gcd(a,b);\n}",
|
|
|
"java": "static int gcd(int a, int b) { return b==0?a:gcd(b, a%b); }\npublic static void main(String[] args) {\n int a=12, b=15;\n System.out.println(\"HCF: \"+gcd(a,b));\n System.out.println(\"LCM: \"+(a*b)/gcd(a,b));\n}"
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"prime_check": {
|
|
|
"prompts": ["check prime number", "is prime or not", "prime no program", "find if number is prime"],
|
|
|
"python": "num = int(input())\nif num > 1:\n for i in range(2, int(num**0.5)+1):\n if (num % i) == 0: print('Not Prime'); break\n else: print('Prime')\nelse: print('Not Prime')",
|
|
|
"cpp": "bool isPrime(int n) {\n if (n <= 1) return false;\n for (int i = 2; i * i <= n; i++)\n if (n % i == 0) return false;\n return true;\n}",
|
|
|
"java": "boolean isPrime(int n) {\n if (n <= 1) return false;\n for (int i = 2; i * i <= n; i++)\n if (n % i == 0) return false;\n return true;\n}"
|
|
|
},
|
|
|
"armstrong": {
|
|
|
"prompts": ["armstrong number", "sum of cubes of digits", "check armstrong", "narcissistic number"],
|
|
|
"python": "n = int(input())\nsum = 0\ntemp = n\nwhile temp > 0:\n digit = temp % 10\n sum += digit ** 3\n temp //= 10\nif n == sum: print('Armstrong')\nelse: print('Not Armstrong')",
|
|
|
"cpp": "int n, r, sum=0, temp;\ncin >> n;\ntemp = n;\nwhile(n>0){r=n%10;sum=sum+(r*r*r);n=n/10;}\nif(temp==sum) cout<<\"Armstrong\";\nelse cout<<\"Not\";",
|
|
|
"java": "int n=153, r, sum=0, temp;\ntemp = n;\nwhile(n>0){r=n%10;sum=sum+(r*r*r);n=n/10;}\nif(temp==sum) System.out.println(\"Armstrong\");\nelse System.out.println(\"Not\");"
|
|
|
},
|
|
|
"palindrome_number": {
|
|
|
"prompts": ["palindrome number", "reverse number equal", "check number palindrome"],
|
|
|
"python": "n = input()\nif n == n[::-1]: print('Palindrome')\nelse: print('Not Palindrome')",
|
|
|
"cpp": "int n, r, sum=0, temp;\ncin >> n;\ntemp = n;\nwhile(n>0){r=n%10;sum=(sum*10)+r;n=n/10;}\nif(temp==sum) cout<<\"Palindrome\";\nelse cout<<\"Not\";",
|
|
|
"java": "int n=121, r, sum=0, temp;\ntemp = n;\nwhile(n>0){r=n%10;sum=(sum*10)+r;n=n/10;}\nif(temp==sum) System.out.println(\"Palindrome\");\nelse System.out.println(\"Not\");"
|
|
|
},
|
|
|
"sum_of_digits": {
|
|
|
"prompts": ["sum of digits", "add all digits of number", "digit sum logic"],
|
|
|
"python": "n = int(input())\ns = 0\nwhile n > 0:\n s += n % 10\n n //= 10\nprint(s)",
|
|
|
"cpp": "int n, sum=0;\ncin >> n;\nwhile(n>0) { sum += n%10; n/=10; }\ncout << sum;",
|
|
|
"java": "int n=123, sum=0;\nwhile(n>0) { sum += n%10; n/=10; }\nSystem.out.println(sum);"
|
|
|
},
|
|
|
"decimal_to_binary": {
|
|
|
"prompts": ["decimal to binary", "convert dec to bin", "binary of number"],
|
|
|
"python": "n = int(input())\nprint(bin(n).replace('0b', ''))",
|
|
|
"cpp": "void decToBinary(int n) {\n int binaryNum[32];\n int i = 0;\n while (n > 0) {\n binaryNum[i] = n % 2;\n n = n / 2;\n i++;\n }\n for (int j = i - 1; j >= 0; j--) cout << binaryNum[j];\n}",
|
|
|
"java": "void decToBinary(int n) {\n System.out.println(Integer.toBinaryString(n));\n}"
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"bubble_sort": {
|
|
|
"prompts": ["bubble sort", "sort array ascending", "sorting algorithm", "arrange elements"],
|
|
|
"python": "arr = [64, 34, 25, 12, 22, 11, 90]\nn = len(arr)\nfor i in range(n):\n for j in range(0, n-i-1):\n if arr[j] > arr[j+1]: arr[j], arr[j+1] = arr[j+1], arr[j]\nprint(arr)",
|
|
|
"cpp": "void bubbleSort(int arr[], int n) {\n for (int i = 0; i < n-1; i++)\n for (int j = 0; j < n-i-1; j++)\n if (arr[j] > arr[j+1]) swap(arr[j], arr[j+1]);\n}",
|
|
|
"java": "void bubbleSort(int arr[]) {\n int n = arr.length;\n for (int i = 0; i < n-1; i++)\n for (int j = 0; j < n-i-1; j++)\n if (arr[j] > arr[j+1]) {\n int temp = arr[j]; arr[j] = arr[j+1]; arr[j+1] = temp;\n }\n}"
|
|
|
},
|
|
|
"linear_search": {
|
|
|
"prompts": ["linear search", "find element in array", "search number list"],
|
|
|
"python": "arr = [10, 20, 30, 40]\nx = 30\nif x in arr: print('Found')\nelse: print('Not Found')",
|
|
|
"cpp": "int search(int arr[], int n, int x) {\n for (int i = 0; i < n; i++)\n if (arr[i] == x) return i;\n return -1;\n}",
|
|
|
"java": "int search(int arr[], int x) {\n for (int i = 0; i < arr.length; i++)\n if (arr[i] == x) return i;\n return -1;\n}"
|
|
|
},
|
|
|
"largest_in_array": {
|
|
|
"prompts": ["largest element in array", "max in array", "find biggest number in list"],
|
|
|
"python": "arr = [10, 324, 45, 90, 9808]\nprint(max(arr))",
|
|
|
"cpp": "int largest(int arr[], int n) {\n int max = arr[0];\n for (int i = 1; i < n; i++)\n if (arr[i] > max) max = arr[i];\n return max;\n}",
|
|
|
"java": "int largest(int arr[]) {\n int max = arr[0];\n for (int i = 1; i < arr.length; i++)\n if (arr[i] > max) max = arr[i];\n return max;\n}"
|
|
|
},
|
|
|
"matrix_add": {
|
|
|
"prompts": ["matrix addition", "add two matrices", "sum of matrix"],
|
|
|
"python": "X = [[1,2,3], [4 ,5,6], [7 ,8,9]]\nY = [[9,8,7], [6,5,4], [3,2,1]]\nresult = [[X[i][j] + Y[i][j] for j in range(len(X[0]))] for i in range(len(X))]\nfor r in result: print(r)",
|
|
|
"cpp": "void addMatrix(int A[3][3], int B[3][3]) {\n for(int i=0;i<3;i++) {\n for(int j=0;j<3;j++) cout<<A[i][j]+B[i][j]<<\" \";\n cout<<endl;\n }\n}",
|
|
|
"java": "void addMatrix(int A[][], int B[][]) {\n for(int i=0;i<3;i++) {\n for(int j=0;j<3;j++) System.out.print(A[i][j]+B[i][j]+\" \");\n System.out.println();\n }\n}"
|
|
|
},
|
|
|
"matrix_transpose": {
|
|
|
"prompts": ["matrix transpose", "transpose of matrix", "swap rows and columns"],
|
|
|
"python": "X = [[12,7], [4 ,5], [3 ,8]]\nresult = [[X[j][i] for j in range(len(X))] for i in range(len(X[0]))]\nfor r in result: print(r)",
|
|
|
"cpp": "void transpose(int A[3][3]) {\n for(int i=0;i<3;i++) {\n for(int j=0;j<3;j++) cout<<A[j][i]<<\" \";\n cout<<endl;\n }\n}",
|
|
|
"java": "void transpose(int A[][]) {\n for(int i=0;i<3;i++) {\n for(int j=0;j<3;j++) System.out.print(A[j][i]+\" \");\n System.out.println();\n }\n}"
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"string_palindrome": {
|
|
|
"prompts": ["string palindrome", "check word palindrome", "reverse string equal"],
|
|
|
"python": "s = input()\nif s == s[::-1]: print('Palindrome')\nelse: print('Not Palindrome')",
|
|
|
"cpp": "string s; cin >> s;\nstring rev = string(s.rbegin(), s.rend());\nif (s == rev) cout << \"Palindrome\";\nelse cout << \"Not\";",
|
|
|
"java": "String str = \"madam\", rev = \"\";\nfor (int i = str.length() - 1; i >= 0; i--) rev = rev + str.charAt(i);\nif (str.equals(rev)) System.out.println(\"Palindrome\");"
|
|
|
},
|
|
|
"vowel_count": {
|
|
|
"prompts": ["count vowels", "number of vowels in string", "vowel consonant count"],
|
|
|
"python": "s = input().lower()\ncount = 0\nfor char in s:\n if char in 'aeiou': count += 1\nprint(count)",
|
|
|
"cpp": "string s; cin >> s;\nint count = 0;\nfor(char c : s) {\n if(c=='a'||c=='e'||c=='i'||c=='o'||c=='u') count++;\n}\ncout << count;",
|
|
|
"java": "String s = \"hello\";\nint count = 0;\nfor(int i=0; i<s.length(); i++) {\n char c = s.charAt(i);\n if(c=='a'||c=='e'||c=='i'||c=='o'||c=='u') count++;\n}"
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"star_pyramid": {
|
|
|
"prompts": ["star pyramid", "triangle star pattern", "print pyramid"],
|
|
|
"python": "n = 5\nfor i in range(n):\n print(' '*(n-i-1) + '* '*(i+1))",
|
|
|
"cpp": "int n=5;\nfor(int i=1; i<=n; i++) {\n for(int j=1; j<=n-i; j++) cout<<\" \";\n for(int j=1; j<=i; j++) cout<<\"* \";\n cout<<endl;\n}",
|
|
|
"java": "int n=5;\nfor(int i=1; i<=n; i++) {\n for(int j=1; j<=n-i; j++) System.out.print(\" \");\n for(int j=1; j<=i; j++) System.out.print(\"* \");\n System.out.println();\n}"
|
|
|
},
|
|
|
"right_triangle": {
|
|
|
"prompts": ["right angle triangle", "star pattern right", "simple star pattern"],
|
|
|
"python": "n=5\nfor i in range(1, n+1):\n print('* ' * i)",
|
|
|
"cpp": "for(int i=1; i<=5; i++){\n for(int j=1; j<=i; j++) cout<<\"* \";\n cout<<endl;\n}",
|
|
|
"java": "for(int i=1; i<=5; i++){\n for(int j=1; j<=i; j++) System.out.print(\"* \");\n System.out.println();\n}"
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"calculator": {
|
|
|
"prompts": ["simple calculator", "add sub mul div", "switch case calculator", "calc program"],
|
|
|
"python": "def calc(a, b, op):\n if op == '+': return a + b\n elif op == '-': return a - b\n elif op == '*': return a * b\n elif op == '/': return a / b",
|
|
|
"cpp": "switch(op) {\n case '+': cout << a+b; break;\n case '-': cout << a-b; break;\n case '*': cout << a*b; break;\n case '/': cout << a/b; break;\n}",
|
|
|
"java": "switch(op) {\n case '+': System.out.println(a+b); break;\n case '-': System.out.println(a-b); break;\n case '*': System.out.println(a*b); break;\n case '/': System.out.println(a/b); break;\n}"
|
|
|
},
|
|
|
"hello_world": {
|
|
|
"prompts": ["hello world", "print hello", "basic program", "test code"],
|
|
|
"python": "print(\"Hello World\")",
|
|
|
"cpp": "#include <iostream>\nusing namespace std;\nint main() {\n cout << \"Hello World\";\n return 0;\n}",
|
|
|
"java": "public class Main {\n public static void main(String[] args) {\n System.out.println(\"Hello World\");\n }\n}"
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_dataset():
|
|
|
print("Generating THE ULTIMATE Rosetta Stone Dataset...")
|
|
|
data = []
|
|
|
|
|
|
for algo_key, templates in ALGORITHMS.items():
|
|
|
base_prompts = templates["prompts"]
|
|
|
|
|
|
for _ in range(SAMPLES_PER_ALGO):
|
|
|
|
|
|
prompt_base = random.choice(base_prompts)
|
|
|
lang = random.choice(["python", "cpp", "java"])
|
|
|
|
|
|
|
|
|
variations = [
|
|
|
f"{prompt_base} in {lang}",
|
|
|
f"write {lang} code for {prompt_base}",
|
|
|
f"how to {prompt_base} using {lang}",
|
|
|
f"program for {prompt_base} in {lang}",
|
|
|
f"give me {prompt_base} code {lang}"
|
|
|
]
|
|
|
query = random.choice(variations)
|
|
|
|
|
|
|
|
|
code = templates[lang]
|
|
|
|
|
|
data.append([query, lang, code])
|
|
|
|
|
|
|
|
|
with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as f:
|
|
|
writer = csv.writer(f)
|
|
|
writer.writerow(["prompt", "language", "code"])
|
|
|
writer.writerows(data)
|
|
|
|
|
|
print(f"✅ Created {len(data)} training samples covering {len(ALGORITHMS)} major topics.")
|
|
|
print(f"Saved to {OUTPUT_FILE}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
generate_dataset() |