File size: 15,243 Bytes
dc19554
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import csv
import random

# ============================
# CONFIGURATION
# ============================
OUTPUT_FILE = "rosetta_code_dataset.csv"
SAMPLES_PER_ALGO = 500  # High variation count for better matching

# ============================
# 1. THE ULTIMATE ALGORITHM LIBRARY
# ============================
ALGORITHMS = {
    # ---------------------------
    # BASIC MATH & LOGIC
    # ---------------------------
    "factorial": {
        "prompts": ["factorial of a number", "calculate n!", "multiplication of 1 to n", "find factorial", "fact code"],
        "python": "def factorial(n):\n    return 1 if n == 0 else n * factorial(n-1)\nnum = int(input())\nprint(factorial(num))",
        "cpp": "#include<iostream>\nusing namespace std;\nint factorial(int n) {\n    return (n == 0) ? 1 : n * factorial(n - 1);\n}\nint main() {\n    int n; cin>>n;\n    cout << factorial(n);\n}",
        "java": "import java.util.Scanner;\nclass Main {\n    static int factorial(int n) {\n        return (n == 0) ? 1 : n * factorial(n - 1);\n    }\n    public static void main(String[] args) {\n        Scanner sc = new Scanner(System.in);\n        System.out.println(factorial(sc.nextInt()));\n    }\n}"
    },
    "fibonacci": {
        "prompts": ["fibonacci series", "print fib numbers", "sequence 0 1 1 2 3", "fib series", "fibonacci recursion"],
        "python": "n = int(input())\na, b = 0, 1\nfor _ in range(n):\n    print(a, end=' ')\n    a, b = b, a+b",
        "cpp": "int n, a=0, b=1, next;\ncin >> n;\nfor (int i = 0; i < n; i++) {\n    cout << a << \" \";\n    next = a + b;\n    a = b;\n    b = next;\n}",
        "java": "int n = 10, a = 0, b = 1;\nfor (int i = 0; i < n; i++) {\n    System.out.print(a + \" \");\n    int next = a + b;\n    a = b;\n    b = next;\n}"
    },
    "swap_two_numbers": {
        "prompts": ["swap two numbers", "swap variables without temp", "exchange values", "swap logic"],
        "python": "a = int(input())\nb = int(input())\na, b = b, a\nprint(a, b)",
        "cpp": "int a, b;\ncin >> a >> b;\na = a + b;\nb = a - b;\na = a - b;\ncout << a << \" \" << b;",
        "java": "int a = 5, b = 10;\na = a + b;\nb = a - b;\na = a - b;\nSystem.out.println(a + \" \" + b);"
    },
    "leap_year": {
        "prompts": ["check leap year", "is year leap", "leap year logic", "days in february year"],
        "python": "year = int(input())\nif (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0):\n    print('Leap Year')\nelse:\n    print('Not Leap Year')",
        "cpp": "int year;\ncin >> year;\nif ((year % 4 == 0 && year % 100 != 0) || (year % 400 == 0))\n    cout << \"Leap Year\";\nelse\n    cout << \"Not Leap Year\";",
        "java": "int year = 2024;\nif ((year % 4 == 0 && year % 100 != 0) || (year % 400 == 0))\n    System.out.println(\"Leap Year\");\nelse\n    System.out.println(\"Not Leap Year\");"
    },
    "odd_even": {
        "prompts": ["check odd even", "is number divisible by 2", "find even number", "odd number logic"],
        "python": "num = int(input())\nif num % 2 == 0: print('Even')\nelse: print('Odd')",
        "cpp": "int n; cin >> n;\nif(n % 2 == 0) cout << \"Even\";\nelse cout << \"Odd\";",
        "java": "int n = 5;\nif(n % 2 == 0) System.out.println(\"Even\");\nelse System.out.println(\"Odd\");"
    },
    "lcm_hcf": {
        "prompts": ["lcm and hcf", "gcd of two numbers", "least common multiple", "highest common factor"],
        "python": "import math\na, b = 12, 15\ngcd = math.gcd(a, b)\nlcm = (a*b)//gcd\nprint('HCF:', gcd, 'LCM:', lcm)",
        "cpp": "int gcd(int a, int b) { return b==0?a:gcd(b, a%b); }\nint main() {\n   int a=12, b=15;\n   cout<<\"HCF: \"<<gcd(a,b);\n   cout<<\"LCM: \"<<(a*b)/gcd(a,b);\n}",
        "java": "static int gcd(int a, int b) { return b==0?a:gcd(b, a%b); }\npublic static void main(String[] args) {\n   int a=12, b=15;\n   System.out.println(\"HCF: \"+gcd(a,b));\n   System.out.println(\"LCM: \"+(a*b)/gcd(a,b));\n}"
    },
    
    # ---------------------------
    # NUMBER THEORY
    # ---------------------------
    "prime_check": {
        "prompts": ["check prime number", "is prime or not", "prime no program", "find if number is prime"],
        "python": "num = int(input())\nif num > 1:\n    for i in range(2, int(num**0.5)+1):\n        if (num % i) == 0: print('Not Prime'); break\n    else: print('Prime')\nelse: print('Not Prime')",
        "cpp": "bool isPrime(int n) {\n    if (n <= 1) return false;\n    for (int i = 2; i * i <= n; i++)\n        if (n % i == 0) return false;\n    return true;\n}",
        "java": "boolean isPrime(int n) {\n    if (n <= 1) return false;\n    for (int i = 2; i * i <= n; i++)\n        if (n % i == 0) return false;\n    return true;\n}"
    },
    "armstrong": {
        "prompts": ["armstrong number", "sum of cubes of digits", "check armstrong", "narcissistic number"],
        "python": "n = int(input())\nsum = 0\ntemp = n\nwhile temp > 0:\n   digit = temp % 10\n   sum += digit ** 3\n   temp //= 10\nif n == sum: print('Armstrong')\nelse: print('Not Armstrong')",
        "cpp": "int n, r, sum=0, temp;\ncin >> n;\ntemp = n;\nwhile(n>0){r=n%10;sum=sum+(r*r*r);n=n/10;}\nif(temp==sum) cout<<\"Armstrong\";\nelse cout<<\"Not\";",
        "java": "int n=153, r, sum=0, temp;\ntemp = n;\nwhile(n>0){r=n%10;sum=sum+(r*r*r);n=n/10;}\nif(temp==sum) System.out.println(\"Armstrong\");\nelse System.out.println(\"Not\");"
    },
    "palindrome_number": {
        "prompts": ["palindrome number", "reverse number equal", "check number palindrome"],
        "python": "n = input()\nif n == n[::-1]: print('Palindrome')\nelse: print('Not Palindrome')",
        "cpp": "int n, r, sum=0, temp;\ncin >> n;\ntemp = n;\nwhile(n>0){r=n%10;sum=(sum*10)+r;n=n/10;}\nif(temp==sum) cout<<\"Palindrome\";\nelse cout<<\"Not\";",
        "java": "int n=121, r, sum=0, temp;\ntemp = n;\nwhile(n>0){r=n%10;sum=(sum*10)+r;n=n/10;}\nif(temp==sum) System.out.println(\"Palindrome\");\nelse System.out.println(\"Not\");"
    },
    "sum_of_digits": {
        "prompts": ["sum of digits", "add all digits of number", "digit sum logic"],
        "python": "n = int(input())\ns = 0\nwhile n > 0:\n    s += n % 10\n    n //= 10\nprint(s)",
        "cpp": "int n, sum=0;\ncin >> n;\nwhile(n>0) { sum += n%10; n/=10; }\ncout << sum;",
        "java": "int n=123, sum=0;\nwhile(n>0) { sum += n%10; n/=10; }\nSystem.out.println(sum);"
    },
    "decimal_to_binary": {
        "prompts": ["decimal to binary", "convert dec to bin", "binary of number"],
        "python": "n = int(input())\nprint(bin(n).replace('0b', ''))",
        "cpp": "void decToBinary(int n) {\n    int binaryNum[32];\n    int i = 0;\n    while (n > 0) {\n        binaryNum[i] = n % 2;\n        n = n / 2;\n        i++;\n    }\n    for (int j = i - 1; j >= 0; j--) cout << binaryNum[j];\n}",
        "java": "void decToBinary(int n) {\n    System.out.println(Integer.toBinaryString(n));\n}"
    },

    # ---------------------------
    # ARRAYS & MATRICES
    # ---------------------------
    "bubble_sort": {
        "prompts": ["bubble sort", "sort array ascending", "sorting algorithm", "arrange elements"],
        "python": "arr = [64, 34, 25, 12, 22, 11, 90]\nn = len(arr)\nfor i in range(n):\n    for j in range(0, n-i-1):\n        if arr[j] > arr[j+1]: arr[j], arr[j+1] = arr[j+1], arr[j]\nprint(arr)",
        "cpp": "void bubbleSort(int arr[], int n) {\n    for (int i = 0; i < n-1; i++)\n        for (int j = 0; j < n-i-1; j++)\n            if (arr[j] > arr[j+1]) swap(arr[j], arr[j+1]);\n}",
        "java": "void bubbleSort(int arr[]) {\n    int n = arr.length;\n    for (int i = 0; i < n-1; i++)\n        for (int j = 0; j < n-i-1; j++)\n            if (arr[j] > arr[j+1]) {\n                int temp = arr[j]; arr[j] = arr[j+1]; arr[j+1] = temp;\n            }\n}"
    },
    "linear_search": {
        "prompts": ["linear search", "find element in array", "search number list"],
        "python": "arr = [10, 20, 30, 40]\nx = 30\nif x in arr: print('Found')\nelse: print('Not Found')",
        "cpp": "int search(int arr[], int n, int x) {\n    for (int i = 0; i < n; i++)\n        if (arr[i] == x) return i;\n    return -1;\n}",
        "java": "int search(int arr[], int x) {\n    for (int i = 0; i < arr.length; i++)\n        if (arr[i] == x) return i;\n    return -1;\n}"
    },
    "largest_in_array": {
        "prompts": ["largest element in array", "max in array", "find biggest number in list"],
        "python": "arr = [10, 324, 45, 90, 9808]\nprint(max(arr))",
        "cpp": "int largest(int arr[], int n) {\n    int max = arr[0];\n    for (int i = 1; i < n; i++)\n        if (arr[i] > max) max = arr[i];\n    return max;\n}",
        "java": "int largest(int arr[]) {\n    int max = arr[0];\n    for (int i = 1; i < arr.length; i++)\n        if (arr[i] > max) max = arr[i];\n    return max;\n}"
    },
    "matrix_add": {
        "prompts": ["matrix addition", "add two matrices", "sum of matrix"],
        "python": "X = [[1,2,3], [4 ,5,6], [7 ,8,9]]\nY = [[9,8,7], [6,5,4], [3,2,1]]\nresult = [[X[i][j] + Y[i][j]  for j in range(len(X[0]))] for i in range(len(X))]\nfor r in result: print(r)",
        "cpp": "void addMatrix(int A[3][3], int B[3][3]) {\n    for(int i=0;i<3;i++) {\n        for(int j=0;j<3;j++) cout<<A[i][j]+B[i][j]<<\" \";\n        cout<<endl;\n    }\n}",
        "java": "void addMatrix(int A[][], int B[][]) {\n    for(int i=0;i<3;i++) {\n        for(int j=0;j<3;j++) System.out.print(A[i][j]+B[i][j]+\" \");\n        System.out.println();\n    }\n}"
    },
    "matrix_transpose": {
        "prompts": ["matrix transpose", "transpose of matrix", "swap rows and columns"],
        "python": "X = [[12,7], [4 ,5], [3 ,8]]\nresult = [[X[j][i] for j in range(len(X))] for i in range(len(X[0]))]\nfor r in result: print(r)",
        "cpp": "void transpose(int A[3][3]) {\n    for(int i=0;i<3;i++) {\n        for(int j=0;j<3;j++) cout<<A[j][i]<<\" \";\n        cout<<endl;\n    }\n}",
        "java": "void transpose(int A[][]) {\n    for(int i=0;i<3;i++) {\n        for(int j=0;j<3;j++) System.out.print(A[j][i]+\" \");\n        System.out.println();\n    }\n}"
    },

    # ---------------------------
    # STRINGS
    # ---------------------------
    "string_palindrome": {
        "prompts": ["string palindrome", "check word palindrome", "reverse string equal"],
        "python": "s = input()\nif s == s[::-1]: print('Palindrome')\nelse: print('Not Palindrome')",
        "cpp": "string s; cin >> s;\nstring rev = string(s.rbegin(), s.rend());\nif (s == rev) cout << \"Palindrome\";\nelse cout << \"Not\";",
        "java": "String str = \"madam\", rev = \"\";\nfor (int i = str.length() - 1; i >= 0; i--) rev = rev + str.charAt(i);\nif (str.equals(rev)) System.out.println(\"Palindrome\");"
    },
    "vowel_count": {
        "prompts": ["count vowels", "number of vowels in string", "vowel consonant count"],
        "python": "s = input().lower()\ncount = 0\nfor char in s:\n    if char in 'aeiou': count += 1\nprint(count)",
        "cpp": "string s; cin >> s;\nint count = 0;\nfor(char c : s) {\n    if(c=='a'||c=='e'||c=='i'||c=='o'||c=='u') count++;\n}\ncout << count;",
        "java": "String s = \"hello\";\nint count = 0;\nfor(int i=0; i<s.length(); i++) {\n    char c = s.charAt(i);\n    if(c=='a'||c=='e'||c=='i'||c=='o'||c=='u') count++;\n}"
    },
    
    # ---------------------------
    # PATTERNS (EXAM FAVORITES)
    # ---------------------------
    "star_pyramid": {
        "prompts": ["star pyramid", "triangle star pattern", "print pyramid"],
        "python": "n = 5\nfor i in range(n):\n    print(' '*(n-i-1) + '* '*(i+1))",
        "cpp": "int n=5;\nfor(int i=1; i<=n; i++) {\n    for(int j=1; j<=n-i; j++) cout<<\" \";\n    for(int j=1; j<=i; j++) cout<<\"* \";\n    cout<<endl;\n}",
        "java": "int n=5;\nfor(int i=1; i<=n; i++) {\n    for(int j=1; j<=n-i; j++) System.out.print(\" \");\n    for(int j=1; j<=i; j++) System.out.print(\"* \");\n    System.out.println();\n}"
    },
    "right_triangle": {
        "prompts": ["right angle triangle", "star pattern right", "simple star pattern"],
        "python": "n=5\nfor i in range(1, n+1):\n    print('* ' * i)",
        "cpp": "for(int i=1; i<=5; i++){\n    for(int j=1; j<=i; j++) cout<<\"* \";\n    cout<<endl;\n}",
        "java": "for(int i=1; i<=5; i++){\n    for(int j=1; j<=i; j++) System.out.print(\"* \");\n    System.out.println();\n}"
    },
    
    # ---------------------------
    # UTILITY
    # ---------------------------
    "calculator": {
        "prompts": ["simple calculator", "add sub mul div", "switch case calculator", "calc program"],
        "python": "def calc(a, b, op):\n    if op == '+': return a + b\n    elif op == '-': return a - b\n    elif op == '*': return a * b\n    elif op == '/': return a / b",
        "cpp": "switch(op) {\n    case '+': cout << a+b; break;\n    case '-': cout << a-b; break;\n    case '*': cout << a*b; break;\n    case '/': cout << a/b; break;\n}",
        "java": "switch(op) {\n    case '+': System.out.println(a+b); break;\n    case '-': System.out.println(a-b); break;\n    case '*': System.out.println(a*b); break;\n    case '/': System.out.println(a/b); break;\n}"
    },
    "hello_world": {
        "prompts": ["hello world", "print hello", "basic program", "test code"],
        "python": "print(\"Hello World\")",
        "cpp": "#include <iostream>\nusing namespace std;\nint main() {\n    cout << \"Hello World\";\n    return 0;\n}",
        "java": "public class Main {\n    public static void main(String[] args) {\n        System.out.println(\"Hello World\");\n    }\n}"
    }
}

# ============================
# 2. GENERATOR LOGIC
# ============================
def generate_dataset():
    print("Generating THE ULTIMATE Rosetta Stone Dataset...")
    data = []

    for algo_key, templates in ALGORITHMS.items():
        base_prompts = templates["prompts"]
        
        for _ in range(SAMPLES_PER_ALGO):
            # 1. Randomize Prompt
            prompt_base = random.choice(base_prompts)
            lang = random.choice(["python", "cpp", "java"])
            
            # 2. Create natural language variation
            variations = [
                f"{prompt_base} in {lang}",
                f"write {lang} code for {prompt_base}",
                f"how to {prompt_base} using {lang}",
                f"program for {prompt_base} in {lang}",
                f"give me {prompt_base} code {lang}"
            ]
            query = random.choice(variations)
            
            # 3. Get Code
            code = templates[lang]
            
            data.append([query, lang, code])

    # Save
    with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(["prompt", "language", "code"])
        writer.writerows(data)

    print(f"✅ Created {len(data)} training samples covering {len(ALGORITHMS)} major topics.")
    print(f"Saved to {OUTPUT_FILE}")

if __name__ == "__main__":
    generate_dataset()