udiboy1209 commited on Jul 4, 2025

Commit

e78b7eb

1 Parent(s): f0e5581

Add real world dataset

Browse files

Files changed (18) hide show

.gitignore +3 -0
real_world_dataset/collect_dataset.py +79 -0
real_world_dataset/custom_source/arduino_sensorkit.c +45 -0
real_world_dataset/custom_source/ardupilot.c +93 -0
real_world_dataset/custom_source/cleanflight.c +78 -0
real_world_dataset/custom_source/linux_color_cvt.c +76 -0
real_world_dataset/custom_source/nn_funcs.f90 +135 -0
real_world_dataset/custom_source/ntc_thermistor.c +92 -0
real_world_dataset/custom_source/paparazzi.c +186 -0
real_world_dataset/custom_source/pathtracing.c +46 -0
real_world_dataset/dataset_info.json +236 -0
real_world_dataset/eval_dataset.py +100 -0
real_world_dataset/generate.sh +21 -0
real_world_dataset/make.sh +38 -0
real_world_dataset/preprocess.sh +21 -0
real_world_dataset/related_evals/run_btc.sh +3 -0
real_world_dataset/related_evals/run_nova.py +57 -0
real_world_dataset/related_evals/run_slade.py +149 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__/
+dataset/
+tokenized/

real_world_dataset/collect_dataset.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from os import makedirs
+from os.path import join as pjoin
+import json
+import logging
+from tqdm import tqdm
+import numpy as np
+from remend.tools.disassemble import DisassemblerX64, DisassemblerARM32, DisassemblerAArch64
+# Dont show warnings
+logging.getLogger("cle").setLevel(logging.ERROR)
+BUILD="build"
+DATASET="dataset"
+#     # "linux_color_cvt": [
+#         # {"name": "transfer_rgb_to_oprgb", "eqn": "pow(x, 1/2.19921875)"},
+#         # {"name": "transfer_rgb_to_dcip3", "eqn": "pow(x, 1/2.6)"},
+#         # {"name": "transfer_rgb_to_smpte2084", "eqn": "pow(x, 1/2.6)"},
+#     # ]
+with open("dataset_info.json", "r") as f:
+    BINS = json.load(f)
+DTYPES = ["f", "d"]
+OPTS = [0, 1, 2] # 3
+ARCH = [
+    ("arm32", DisassemblerARM32),
+    ("aarch64", DisassemblerAArch64),
+    ("x64", DisassemblerX64),
+]
+def match_constants(asm_c, eqn_c):
+    from math import pi, tan, sqrt
+    cmap = {}
+    for ac, acv in asm_c.items():
+        acv = float(acv)
+        for ec, ecv in eqn_c.items():
+            ecv = eval(ecv)
+            if abs(acv - ecv) < 1e-3:
+                cmap[ec] = str(ac)
+    return cmap
+makedirs(DATASET, exist_ok=True)
+for arch, Dclass in ARCH:
+    asmf = open(pjoin(DATASET, arch + ".asm"), "w")
+    eqnf = open(pjoin(DATASET, arch + ".eqn"), "w")
+    infof = open(pjoin(DATASET, arch + ".info"), "w")
+    print("Running:", arch)
+    samples = [(binary, func, data, opt, dtype)
+                    for binary in BINS
+                    for func, data in BINS[binary].items()
+                    for opt in OPTS
+                    for dtype in DTYPES
+              ]
+    for binary, func, data, opt, dtype in tqdm(samples):
+        binpath = pjoin(BUILD, f"{binary}_{arch}_O{opt}.elf")
+        fname = f"{func}_{dtype}"
+        if binary == "nn_funcs":
+            fname += "_"
+        D = Dclass(binpath)
+        diss = D.disassemble(fname)
+        info = {"constants": D.constants, "bin": binpath, "func": fname,
+                "eqn": data["eqn"], "eqn_constants": data["constants"]}
+        prefix = data["prefix"][:]
+        cmap = match_constants(D.constants, data["constants"])
+        if len(cmap) != len(data["constants"]):
+            info["cmap"] = cmap
+        else:
+            for ec, ac in cmap.items():
+                prefix = prefix.replace(ec, "<PH>"+ac)
+            prefix = prefix.replace("<PH>", "k")
+        asmf.write(diss + "\n")
+        eqnf.write(prefix + "\n")
+        json.dump(info, infof)
+        infof.write("\n")
+    asmf.close()
+    infof.close()

real_world_dataset/custom_source/arduino_sensorkit.c ADDED Viewed

	@@ -0,0 +1,45 @@

+#include <math.h>
+#include <stdio.h>
+float calcAltitude_f(float pressure)
+{
+  float A = pressure/101325.0f;
+  float B = 1.0f/5.25588f;
+  float C = powf(A,B);
+  C = 1.0f - C;
+  C = C /0.0000225577f;
+  return C;
+}
+float convertCtoF_f(float c) {
+    return c * 9.0f / 5.0f + 32.0f;
+}
+double calcAltitude_d(double pressure)
+{
+  double A = pressure/101325.0;
+  double B = 1.0/5.25588;
+  double C = pow(A,B);
+  C = 1.0 - C;
+  C = C /0.0000225577;
+  return C;
+}
+double convertCtoF_d(double c) {
+    return c * 9.0f / 5.0f + 32.0f;
+}
+int main() {
+    float xf;
+    double xd;
+    int l;
+    l = scanf("%f", &xf);
+    l = scanf("%lf", &xd);
+    printf("%f\n", calcAltitude_f(xf));
+    printf("%lf\n", calcAltitude_d(xd));
+    printf("%f\n", convertCtoF_f(xf));
+    printf("%lf\n", convertCtoF_d(xd));
+}

real_world_dataset/custom_source/ardupilot.c ADDED Viewed

	@@ -0,0 +1,93 @@

+#include <math.h>
+#include <stdio.h>
+const double PI = 3.14159265359;
+const float PI_F = 3.14159265359f;
+#define DEG_TO_RAD_F      (PI_F / 180.0f)
+#define DEG_TO_RAD_D      (PI / 180.0)
+#define RAD_TO_DEG_F      (1.0f / DEG_TO_RAD_F)
+#define RAD_TO_DEG_D      (1.0 / DEG_TO_RAD_D)
+#define GRAVITY_MSS_F     9.80665f
+#define GRAVITY_MSS_D     9.80665
+float degF_to_Kelvin_f(float temp_f) { return (temp_f + 459.67f) * 0.55556F; }
+double degF_to_Kelvin_d(double temp_f) { return (temp_f + 459.67) * 0.55556; }
+float radians_f(float deg) { return deg * DEG_TO_RAD_F; }
+float degrees_f(float rad) { return rad * RAD_TO_DEG_F; }
+double radians_d(double deg) { return deg * DEG_TO_RAD_D; }
+double degrees_d(double rad) { return rad * RAD_TO_DEG_D; }
+float sq_f(const float v) { return v*v; }
+double sq_d(const double v) { return v*v; }
+double w_d(const double dHertz) { return dHertz * 2.0 * PI; }
+float w_f(const float dHertz) { return dHertz * 2.0f * PI_F; }
+float angle_to_accel_f(float angle_deg) { return GRAVITY_MSS_F * tanf(angle_deg*DEG_TO_RAD_F); }
+float accel_to_angle_f(float accel) { return atanf(accel/GRAVITY_MSS_F)*RAD_TO_DEG_F; }
+double angle_to_accel_d(double angle_deg) { return GRAVITY_MSS_D * tan(angle_deg*DEG_TO_RAD_D); }
+double accel_to_angle_d(double accel) { return atan(accel/GRAVITY_MSS_D)*RAD_TO_DEG_D; }
+#define SQRT_2_3_F 0.816496580927726f
+#define SQRT_6_F   2.449489742783178f
+#define SQRT_2_3_D 0.816496580927726
+#define SQRT_6_D   2.449489742783178
+static const float TAU_FACTOR_F = SQRT_6_F / 24.0f;
+static const double TAU_FACTOR_D = SQRT_6_D / 24.0;
+// Helper function used for Quinn's frequency estimation
+float tau_f(const float x)
+{
+    float p1 = logf(3.0f * x*x + 6.0f * x + 1.0f);
+    float part1 = x + 1.0f - SQRT_2_3_F;
+    float part2 = x + 1.0f + SQRT_2_3_F;
+    float p2 = logf(part1 / part2);
+    return (0.25f * p1 - TAU_FACTOR_F * p2);
+}
+// Helper function used for Quinn's frequency estimation
+double tau_d(const double x)
+{
+    double p1 = log(3.0 * x*x + 6.0 * x + 1.0);
+    double part1 = x + 1.0 - SQRT_2_3_D;
+    double part2 = x + 1.0 + SQRT_2_3_D;
+    double p2 = log(part1 / part2);
+    return (0.25 * p1 - TAU_FACTOR_D * p2);
+}
+int main() {
+    float xf;
+    double xd;
+    int l;
+    l = scanf("%f", &xf);
+    l = scanf("%lf", &xd);
+    printf("%f\n", degF_to_Kelvin_f(xf));
+    printf("%lf\n", degF_to_Kelvin_d(xd));
+    printf("%f\n", radians_f(xf));
+    printf("%lf\n", radians_d(xd));
+    printf("%f\n", degrees_f(xf));
+    printf("%lf\n", degrees_d(xd));
+    printf("%f\n", w_f(xf));
+    printf("%lf\n", w_d(xd));
+    printf("%f\n", angle_to_accel_f(xf));
+    printf("%lf\n", angle_to_accel_d(xd));
+    printf("%f\n", accel_to_angle_f(xf));
+    printf("%lf\n", accel_to_angle_d(xd));
+    printf("%f\n", tau_f(xf));
+    printf("%lf\n", tau_d(xd));
+    printf("%f\n", sq_f(xf));
+    printf("%lf\n", sq_d(xd));
+}

real_world_dataset/custom_source/cleanflight.c ADDED Viewed

	@@ -0,0 +1,78 @@

+#include <math.h>
+#include <stdio.h>
+#define M_PIf       3.14159265358979323846f
+#define sinPolyCoef3_f -1.666665710e-1f                                          // Double: -1.666665709650470145824129400050267289858e-1
+#define sinPolyCoef5_f  8.333017292e-3f                                          // Double:  8.333017291562218127986291618761571373087e-3
+#define sinPolyCoef7_f -1.980661520e-4f                                          // Double: -1.980661520135080504411629636078917643846e-4
+#define sinPolyCoef9_f  2.600054768e-6f                                          // Double:  2.600054767890361277123254766503271638682e-6
+#define sinPolyCoef3_d -1.666665709650470145824129400050267289858e-1
+#define sinPolyCoef5_d  8.333017291562218127986291618761571373087e-3
+#define sinPolyCoef7_d -1.980661520135080504411629636078917643846e-4
+#define sinPolyCoef9_d  2.600054767890361277123254766503271638682e-6
+float invSqrt_f(float x) { return 1.0f / sqrtf(x); }
+double invSqrt_d(double x) { return 1.0 / sqrt(x); }
+float pressureToAltitude_f(const float pressure) { return (1.0f - powf(pressure / 101325.0f, 0.190295f)) * 4433000.0f; }
+double pressureToAltitude_d(const double pressure) { return (1.0 - pow(pressure / 101325.0, 0.190295)) * 4433000.0; }
+float dynThrottle_f(float throttle) { return throttle * (1.0f - (throttle * throttle) / 3.0f) * 1.5f; }
+double dynThrottle_d(double throttle) { return throttle * (1.0 - (throttle * throttle) / 3.0) * 1.5; }
+float calculateAccZLowPassFilterRCTimeConstant_f(float accz_lpf_cutoff) { return 0.5f / (M_PIf * accz_lpf_cutoff); }
+double calculateAccZLowPassFilterRCTimeConstant_d(double accz_lpf_cutoff) { return 0.5 / (M_PI * accz_lpf_cutoff); }
+float calculateThrottleAngleScale_f(float throttle_correction_angle) { return (1800.0f / M_PIf) * (900.0f / throttle_correction_angle); }
+double calculateThrottleAngleScale_d(double throttle_correction_angle) { return (1800.0 / M_PI) * (900.0 / throttle_correction_angle); }
+float sin_approx_f(float x)
+{
+    float x2 = x * x;
+    return x + x * x2 * (sinPolyCoef3_f + x2 * (sinPolyCoef5_f + x2 * (sinPolyCoef7_f + x2 * sinPolyCoef9_f)));
+}
+double sin_approx_d(double x)
+{
+    double x2 = x * x;
+    return x + x * x2 * (sinPolyCoef3_d + x2 * (sinPolyCoef5_d + x2 * (sinPolyCoef7_d + x2 * sinPolyCoef9_d)));
+}
+float acos_approx_f(float xa)
+{
+    return sqrtf(1.0f - xa) * (1.5707288f + xa * (-0.2121144f + xa * (0.0742610f + (-0.0187293f * xa))));
+}
+double acos_approx_d(double xa)
+{
+    return sqrt(1.0 - xa) * (1.5707288 + xa * (-0.2121144 + xa * (0.0742610 + (-0.0187293 * xa))));
+}
+int main() {
+    float xf;
+    double xd;
+    int l;
+    l = scanf("%f", &xf);
+    l = scanf("%lf", &xd);
+    printf("%f\n", invSqrt_f(xf));
+    printf("%lf\n", invSqrt_d(xd));
+    printf("%f\n", pressureToAltitude_f(xf));
+    printf("%lf\n", pressureToAltitude_d(xd));
+    printf("%f\n", dynThrottle_f(xf));
+    printf("%lf\n", dynThrottle_d(xd));
+    printf("%f\n", calculateAccZLowPassFilterRCTimeConstant_f(xf));
+    printf("%lf\n", calculateAccZLowPassFilterRCTimeConstant_d(xd));
+    printf("%f\n", calculateThrottleAngleScale_f(xf));
+    printf("%lf\n", calculateThrottleAngleScale_d(xd));
+    printf("%f\n", sin_approx_f(xf));
+    printf("%lf\n", sin_approx_d(xd));
+    printf("%f\n", acos_approx_f(xf));
+    printf("%lf\n", acos_approx_d(xd));
+}

real_world_dataset/custom_source/linux_color_cvt.c ADDED Viewed

	@@ -0,0 +1,76 @@

+#include <math.h>
+#include <stdio.h>
+static double transfer_rgb_to_oprgb_d(double v)
+{
+	return pow(v, 1.0 / 2.19921875);
+}
+static double transfer_rgb_to_dcip3_d(double v)
+{
+	return pow(v, 1.0 / 2.6);
+}
+static double transfer_rgb_to_smpte2084_d(double v)
+{
+	const double m1 = (2610.0 / 4096.0) / 4.0;
+	const double m2 = 128.0 * 2523.0 / 4096.0;
+	const double c1 = 3424.0 / 4096.0;
+	const double c2 = 32.0 * 2413.0 / 4096.0;
+	const double c3 = 32.0 * 2392.0 / 4096.0;
+	/*
+	 * The RGB input maps to the luminance range 0-100 cd/m^2, while
+	 * SMPTE-2084 maps values to the luminance range of 0-10000 cd/m^2.
+	 * Hence the factor 100.
+	 */
+	v /= 100.0;
+	v = pow(v, m1);
+	return pow((c1 + c2 * v) / (1 + c3 * v), m2);
+}
+static float transfer_rgb_to_oprgb_f(float v)
+{
+	return pow(v, 1.0f / 2.19921875f);
+}
+static float transfer_rgb_to_dcip3_f(float v)
+{
+	return pow(v, 1.0f / 2.6f);
+}
+static float transfer_rgb_to_smpte2084_f(float v)
+{
+	const float m1 = (2610.0f / 4096.0f) / 4.0f;
+	const float m2 = 128.0f * 2523.0f / 4096.0f;
+	const float c1 = 3424.0f / 4096.0f;
+	const float c2 = 32.0f * 2413.0f / 4096.0f;
+	const float c3 = 32.0f * 2392.0f / 4096.0f;
+	/*
+	 * The RGB input maps to the luminance range 0-100 cd/m^2, while
+	 * SMPTE-2084 maps values to the luminance range of 0-10000 cd/m^2.
+	 * Hence the factor 100.
+	 */
+	v /= 100.0f;
+	v = pow(v, m1);
+	return pow((c1 + c2 * v) / (1 + c3 * v), m2);
+}
+int main() {
+    float xf;
+    double xd;
+    int l;
+    l = scanf("%f", &xf);
+    l = scanf("%lf", &xd);
+    printf("%f\n", transfer_rgb_to_oprgb_f(xf));
+    printf("%lf\n", transfer_rgb_to_oprgb_d(xd));
+    printf("%f\n", transfer_rgb_to_dcip3_f(xf));
+    printf("%lf\n", transfer_rgb_to_dcip3_f(xd));
+    printf("%f\n", transfer_rgb_to_smpte2084_f(xf));
+    printf("%lf\n", transfer_rgb_to_smpte2084_d(xd));
+}

real_world_dataset/custom_source/nn_funcs.f90 ADDED Viewed

	@@ -0,0 +1,135 @@

+function logistic_f(x) result(y)
+    implicit none
+    real, intent(in) :: x
+    real :: y
+    y = 1/(1+exp(-x))
+end function
+function logistic_d(x) result(y)
+    implicit none
+    double precision, intent(in) :: x
+    double precision :: y
+    y = 1/(1+exp(-x))
+end function
+function rbf_f(x) result(y)
+    implicit none
+    real, intent(in) :: x
+    real :: y, sigmasq, mean
+    parameter (sigmasq = 1.9983)
+    parameter (mean = 0.7328)
+    y = exp(-(x-mean)**2/sigmasq)
+end function
+function rbf_d(x) result(y)
+    implicit none
+    double precision, intent(in) :: x
+    double precision :: y, sigmasq, mean
+    parameter (sigmasq = 1.9983d0)
+    parameter (mean = 0.7328d0)
+    y = exp(-(x-mean)**2/sigmasq)
+end function
+function multiquad_f(x) result(y)
+    implicit none
+    real, intent(in) :: x
+    real :: y, biassq, mean
+    parameter (biassq = 8.66172)
+    parameter (mean = 0.881)
+    y = sqrt((x-mean)**2 + biassq)
+end function
+function multiquad_d(x) result(y)
+    implicit none
+    double precision, intent(in) :: x
+    double precision :: y, biassq, mean
+    parameter (biassq = 8.66172d0)
+    parameter (mean = 0.881d0)
+    y = sqrt((x-mean)**2 + biassq)
+end function
+function invmultiquad_f(x) result(y)
+    implicit none
+    real, intent(in) :: x
+    real:: y, biassq, mean
+    parameter (biassq = 8.66172)
+    parameter (mean = 0.881)
+    y = 1/sqrt((x-mean)**2 + biassq)
+end function
+function invmultiquad_d(x) result(y)
+    implicit none
+    double precision, intent(in) :: x
+    double precision:: y, biassq, mean
+    parameter (biassq = 8.66172d0)
+    parameter (mean = 0.881d0)
+    y = 1/sqrt((x-mean)**2 + biassq)
+end function
+function tanh_f(x) result(y)
+    implicit none
+    real, intent(in) :: x
+    real:: y
+    y = (exp(x)-exp(-x))/(exp(x)+exp(-x))
+end function
+function tanh_d(x) result(y)
+    implicit none
+    double precision, intent(in) :: x
+    double precision:: y
+    y = (exp(x)-exp(-x))/(exp(x)+exp(-x))
+end function
+function softplus_f(x) result(y)
+    implicit none
+    real, intent(in) :: x
+    real:: y
+    y = log(1 + exp(x))
+end function
+function softplus_d(x) result(y)
+    implicit none
+    double precision, intent(in) :: x
+    double precision:: y
+    y = log(1 + exp(x))
+end function
+function silu_f(x) result(y)
+    implicit none
+    real, intent(in) :: x
+    real:: y
+    y = x/(1 + exp(-x))
+end function
+function silu_d(x) result(y)
+    implicit none
+    double precision, intent(in) :: x
+    double precision:: y
+    y = x/(1 + exp(-x))
+end function
+program main
+    implicit none
+    real :: xf
+    double precision :: xd
+    real :: logistic_f, rbf_f, multiquad_f, invmultiquad_f, tanh_f, softplus_f, silu_f
+    double precision :: logistic_d, rbf_d, multiquad_d, invmultiquad_d, tanh_d, softplus_d, silu_d
+    read(*, *) xf
+    read(*, *) xd
+    print *, ":", logistic_f(xf)
+    print *, ":", logistic_d(xd)
+    print *, ":", rbf_f(xf)
+    print *, ":", rbf_d(xd)
+    print *, ":", multiquad_f(xf)
+    print *, ":", multiquad_d(xd)
+    print *, ":", invmultiquad_f(xf)
+    print *, ":", invmultiquad_d(xd)
+    print *, ":", tanh_f(xf)
+    print *, ":", tanh_d(xd)
+    print *, ":", softplus_f(xf)
+    print *, ":", softplus_d(xd)
+    print *, ":", silu_f(xf)
+    print *, ":", silu_d(xd)
+end program main

real_world_dataset/custom_source/ntc_thermistor.c ADDED Viewed

	@@ -0,0 +1,92 @@

+#define REFERENCE_RESISTANCE_F   8000.0f
+#define NOMINAL_RESISTANCE_F     100000.0f
+#define NOMINAL_TEMPERATURE_F    25.0f
+#define B_VALUE_F                3950.0f
+#define REFERENCE_RESISTANCE_D   8000.0
+#define NOMINAL_RESISTANCE_D     100000.0
+#define NOMINAL_TEMPERATURE_D    25.0
+#define B_VALUE_D                3950.0
+#include <math.h>
+#include <stdio.h>
+double resistanceToKelvins_d(double resistance) {
+	double inverseKelvin = 1.0 / NOMINAL_TEMPERATURE_D +
+		log(resistance / NOMINAL_RESISTANCE_D) / B_VALUE_D;
+	return (1.0 / inverseKelvin);
+}
+double readResistance_d(double voltage) {
+	return REFERENCE_RESISTANCE_D / (1024.0 / voltage - 1.0);
+}
+double celsiusToKelvins_d(double celsius) {
+	return (celsius + 273.15);
+}
+double kelvinsToCelsius_d(double kelvins) {
+	return (kelvins - 273.15);
+}
+double celsiusToFahrenheit_d(double celsius) {
+	return (celsius * 1.8 + 32.0);
+}
+double kelvinsToFahrenheit_d(double kelvins) {
+	return (kelvins - 273.15) * 1.8 + 32.0;
+}
+float resistanceToKelvins_f(float resistance) {
+	float inverseKelvin = 1.0f / NOMINAL_TEMPERATURE_F +
+		logf(resistance / NOMINAL_RESISTANCE_F) / B_VALUE_F;
+	return (1.0f / inverseKelvin);
+}
+float readResistance_f(float voltage) {
+	return REFERENCE_RESISTANCE_F / (1024.0f / voltage - 1.0f);
+}
+float celsiusToKelvins_f(float celsius) {
+	return (celsius + 273.15f);
+}
+float kelvinsToCelsius_f(float kelvins) {
+	return (kelvins - 273.15f);
+}
+float celsiusToFahrenheit_f(float celsius) {
+	return (celsius * 1.8f + 32.0f);
+}
+float kelvinsToFahrenheit_f(float kelvins) {
+	return (kelvins - 273.15f) * 1.8f + 32.0f;
+}
+int main() {
+    float xf;
+    double xd;
+    int l;
+    l = scanf("%f", &xf);
+    l = scanf("%lf", &xd);
+    printf("%f\n", resistanceToKelvins_f(xf));
+    printf("%lf\n", resistanceToKelvins_d(xd));
+    printf("%f\n", celsiusToKelvins_f(xf));
+    printf("%lf\n", celsiusToKelvins_d(xd));
+    printf("%f\n", readResistance_f(xf));
+    printf("%lf\n", readResistance_d(xd));
+    printf("%f\n", kelvinsToCelsius_f(xf));
+    printf("%lf\n", kelvinsToCelsius_d(xd));
+    printf("%f\n", kelvinsToFahrenheit_f(xf));
+    printf("%lf\n", kelvinsToFahrenheit_d(xd));
+    printf("%f\n", celsiusToFahrenheit_f(xf));
+    printf("%lf\n", celsiusToFahrenheit_d(xd));
+}

real_world_dataset/custom_source/paparazzi.c ADDED Viewed

	@@ -0,0 +1,186 @@

+#include <math.h>
+#include <stdio.h>
+#define PI_D 3.14159265359
+#define PI_F 3.14159265359f
+float isometric_latitude0_f(float phi) {
+  return logf (tanf (PI_F/4.0f + phi / 2.0f));
+}
+double isometric_latitude0_d(double phi) {
+  return log (tan (PI_D/4.0 + phi / 2.0));
+}
+// Standard Atmosphere constants
+/** ISA sea level standard atmospheric pressure in Pascal */
+#define PPRZ_ISA_SEA_LEVEL_PRESSURE_F 101325.0f
+#define PPRZ_ISA_SEA_LEVEL_PRESSURE_D 101325.0
+/** ISA sea level standard temperature in Kelvin */
+#define PPRZ_ISA_SEA_LEVEL_TEMP_F 288.15f
+#define PPRZ_ISA_SEA_LEVEL_TEMP_D 288.15
+/** temperature laps rate in K/m */
+#define PPRZ_ISA_TEMP_LAPS_RATE_F 0.0065f
+#define PPRZ_ISA_TEMP_LAPS_RATE_D 0.0065
+/** earth-surface gravitational acceleration in m/s^2 */
+#define PPRZ_ISA_GRAVITY_F 9.80665f
+#define PPRZ_ISA_GRAVITY_D 9.80665
+/** universal gas constant in J/(mol*K) */
+#define PPRZ_ISA_GAS_CONSTANT_F 8.31447f
+#define PPRZ_ISA_GAS_CONSTANT_D 8.31447
+/** molar mass of dry air in kg/mol */
+#define PPRZ_ISA_MOLAR_MASS_F 0.0289644f
+#define PPRZ_ISA_MOLAR_MASS_D 0.0289644
+/** universal gas constant / molar mass of dry air in J*kg/K */
+#define PPRZ_ISA_AIR_GAS_CONSTANT_F (PPRZ_ISA_GAS_CONSTANT_F/PPRZ_ISA_MOLAR_MASS_F)
+#define PPRZ_ISA_AIR_GAS_CONSTANT_D (PPRZ_ISA_GAS_CONSTANT_D/PPRZ_ISA_MOLAR_MASS_D)
+/** standard air density in kg/m^3 */
+#define PPRZ_ISA_AIR_DENSITY_F 1.225f
+#define PPRZ_ISA_AIR_DENSITY_D 1.225
+static const float PPRZ_ISA_M_OF_P_CONST_F = (PPRZ_ISA_AIR_GAS_CONSTANT_F * PPRZ_ISA_SEA_LEVEL_TEMP_F / PPRZ_ISA_GRAVITY_F);
+static const double PPRZ_ISA_M_OF_P_CONST_D = (PPRZ_ISA_AIR_GAS_CONSTANT_D * PPRZ_ISA_SEA_LEVEL_TEMP_D / PPRZ_ISA_GRAVITY_D);
+float pprz_isa_pressure_of_altitude_f(float altitude)
+{
+  return (PPRZ_ISA_SEA_LEVEL_PRESSURE_F * expf((-1.0f / PPRZ_ISA_M_OF_P_CONST_F) * altitude));
+}
+double pprz_isa_pressure_of_altitude_d(double altitude)
+{
+  return (PPRZ_ISA_SEA_LEVEL_PRESSURE_D * exp((-1. / PPRZ_ISA_M_OF_P_CONST_D) * altitude));
+}
+float pprz_isa_altitude_of_pressure_f(float pressure)
+{
+  // if (pressure > 0.) {
+    return (PPRZ_ISA_M_OF_P_CONST_F * logf(PPRZ_ISA_SEA_LEVEL_PRESSURE_F / pressure));
+  // } else {
+    // return 0.;
+  // }
+}
+double pprz_isa_altitude_of_pressure_d(double pressure)
+{
+  // if (pressure > 0.) {
+    return (PPRZ_ISA_M_OF_P_CONST_D * log(PPRZ_ISA_SEA_LEVEL_PRESSURE_D / pressure));
+  // } else {
+    // return 0.;
+  // }
+}
+float pprz_isa_pressure_of_height_f(float height)
+{
+    float ref_p = 0.8f * PPRZ_ISA_SEA_LEVEL_PRESSURE_F;
+  return (ref_p * expf((-1.0f / PPRZ_ISA_M_OF_P_CONST_F) * height));
+}
+double pprz_isa_pressure_of_height_d(double height)
+{
+    double ref_p = 0.8 * PPRZ_ISA_SEA_LEVEL_PRESSURE_D;
+  return (ref_p * exp((-1.0 / PPRZ_ISA_M_OF_P_CONST_D) * height));
+}
+float pprz_isa_height_of_pressure_full_f(float pressure)
+{
+    const float ref_p = 0.8f * PPRZ_ISA_SEA_LEVEL_PRESSURE_F;
+    const float prel = pressure / ref_p;
+    const float inv_expo = PPRZ_ISA_GAS_CONSTANT_F * PPRZ_ISA_TEMP_LAPS_RATE_F /
+                           PPRZ_ISA_GRAVITY_F / PPRZ_ISA_MOLAR_MASS_F;
+    return (1.0f - powf(prel, inv_expo)) * PPRZ_ISA_SEA_LEVEL_TEMP_F / PPRZ_ISA_TEMP_LAPS_RATE_F;
+}
+double pprz_isa_height_of_pressure_full_d(double pressure)
+{
+    const double ref_p = 0.8 * PPRZ_ISA_SEA_LEVEL_PRESSURE_D;
+    const double prel = pressure / ref_p;
+    const double inv_expo = PPRZ_ISA_GAS_CONSTANT_D * PPRZ_ISA_TEMP_LAPS_RATE_D /
+                           PPRZ_ISA_GRAVITY_D / PPRZ_ISA_MOLAR_MASS_D;
+    return (1.0 - pow(prel, inv_expo)) * PPRZ_ISA_SEA_LEVEL_TEMP_D / PPRZ_ISA_TEMP_LAPS_RATE_D;
+}
+float pprz_isa_height_of_pressure_f(float pressure)
+{
+    const float ref_p = 0.8f * PPRZ_ISA_SEA_LEVEL_PRESSURE_F;
+    return (PPRZ_ISA_M_OF_P_CONST_F * logf(ref_p / pressure));
+}
+double pprz_isa_height_of_pressure_d(double pressure)
+{
+    const double ref_p = 0.8 * PPRZ_ISA_SEA_LEVEL_PRESSURE_D;
+    return (PPRZ_ISA_M_OF_P_CONST_D * log(ref_p / pressure));
+}
+float pprz_isa_temperature_of_altitude_f(float alt)
+{
+  return PPRZ_ISA_SEA_LEVEL_TEMP_F - PPRZ_ISA_TEMP_LAPS_RATE_F * alt;
+}
+double pprz_isa_temperature_of_altitude_d(double alt)
+{
+  return PPRZ_ISA_SEA_LEVEL_TEMP_D - PPRZ_ISA_TEMP_LAPS_RATE_D * alt;
+}
+float eas_from_dynamic_pressure_f(float q)
+{
+  const float two_div_rho_0 = 2.0f / PPRZ_ISA_AIR_DENSITY_F;
+  return sqrtf(q * two_div_rho_0);
+}
+double eas_from_dynamic_pressure_d(double q)
+{
+  const double two_div_rho_0 = 2.0 / PPRZ_ISA_AIR_DENSITY_D;
+  return sqrt(q * two_div_rho_0);
+}
+float change_rep_f(float dir)
+{
+  return PI_F/2.0f - dir;
+}
+double change_rep_d(double dir)
+{
+  return PI_D/2.0 - dir;
+}
+#define NMEA_PI180_F                  (PI_F / 180.0f)
+#define NMEA_PI180_D                  (PI_D / 180.0)
+float nmea_degree2radian_f(float val) { return (val * NMEA_PI180_F); }
+double nmea_degree2radian_d(double val) { return (val * NMEA_PI180_D); }
+float nmea_radian2degree_f(float val) { return (val / NMEA_PI180_F); }
+double nmea_radian2degree_d(double val) { return (val / NMEA_PI180_D); }
+int main() {
+    float xf;
+    double xd;
+    int l;
+    l = scanf("%f", &xf);
+    l = scanf("%lf", &xd);
+    printf("%f\n", isometric_latitude0_f(xf));
+    printf("%lf\n", isometric_latitude0_d(xd));
+    printf("%f\n", pprz_isa_pressure_of_altitude_f(xf));
+    printf("%lf\n", pprz_isa_pressure_of_altitude_d(xd));
+    printf("%f\n", pprz_isa_altitude_of_pressure_f(xf));
+    printf("%lf\n", pprz_isa_altitude_of_pressure_d(xd));
+    printf("%f\n", pprz_isa_pressure_of_height_f(xf));
+    printf("%lf\n", pprz_isa_pressure_of_height_d(xd));
+    printf("%f\n", pprz_isa_height_of_pressure_full_f(xf));
+    printf("%lf\n", pprz_isa_height_of_pressure_full_d(xd));
+    printf("%f\n", pprz_isa_height_of_pressure_f(xf));
+    printf("%lf\n", pprz_isa_height_of_pressure_d(xd));
+    printf("%f\n", pprz_isa_temperature_of_altitude_f(xf));
+    printf("%lf\n", pprz_isa_temperature_of_altitude_d(xd));
+    printf("%f\n", eas_from_dynamic_pressure_f(xf));
+    printf("%lf\n", eas_from_dynamic_pressure_d(xd));
+    printf("%f\n", nmea_degree2radian_f(xf));
+    printf("%lf\n", nmea_degree2radian_d(xd));
+    printf("%f\n", nmea_radian2degree_f(xf));
+    printf("%lf\n", nmea_radian2degree_d(xd));
+}

real_world_dataset/custom_source/pathtracing.c ADDED Viewed

	@@ -0,0 +1,46 @@

+#include <math.h>
+#include <stdio.h>
+float fresnelReflectanceAtNormal_f(float index) {
+  float partial = (1.0f - index) / (1.0f + index);
+  return partial * partial;
+}
+double fresnelReflectanceAtNormal_d(double index) {
+  double partial = (1.0 - index) / (1.0 + index);
+  return partial * partial;
+}
+float blinToBeckmann_f(float alpha) {
+  return sqrtf(2.0f / (alpha + 2.0f));
+}
+float beckmannToBlinn_f(float slope) {
+  return 2.0f / (slope * slope) - 2.0f;
+}
+double blinToBeckmann_d(double alpha) {
+  return sqrtf(2.0 / (alpha + 2.0));
+}
+double beckmannToBlinn_d(double slope) {
+  return 2.0 / (slope * slope) - 2.0;
+}
+int main() {
+    float xf;
+    double xd;
+    int l;
+    l = scanf("%f", &xf);
+    l = scanf("%lf", &xd);
+    printf("%f\n",  fresnelReflectanceAtNormal_f(xf));
+    printf("%lf\n", fresnelReflectanceAtNormal_d(xd));
+    printf("%f\n",  blinToBeckmann_f(xf));
+    printf("%lf\n", blinToBeckmann_d(xd));
+    printf("%f\n",  beckmannToBlinn_f(xf));
+    printf("%lf\n", beckmannToBlinn_d(xd));
+}

real_world_dataset/dataset_info.json ADDED Viewed

	@@ -0,0 +1,236 @@

+{
+  "ardupilot": {
+    "w": {
+      "eqn": "2*pi*x",
+      "prefix": "mul INT+ 2 mul pi x",
+      "constants": {}
+    },
+    "degF_to_Kelvin": {
+      "eqn": "(x+459.67)*0.55556",
+      "prefix": "mul k0 add x k1",
+      "constants": {"k0": "459.67", "k1": "0.55556"}
+    },
+    "radians": {
+      "eqn": "x*pi/180",
+      "prefix": "mul k0 x",
+      "constants": {"k0": "pi/180"}
+    },
+    "degrees": {
+      "eqn": "x*180/pi",
+      "prefix": "mul k0 x",
+      "constants": {"k0": "180/pi"}
+    },
+    "sq": {
+      "eqn": "x**2",
+      "prefix": "pow x INT+ 2",
+      "constants": {}
+    },
+    "angle_to_accel": {
+      "eqn": "9.80665*tan(x*pi/180)",
+      "prefix": "mul k0 tan mul x k1",
+      "constants": {"k0": "9.80665", "k1": "pi/180"}
+    },
+    "accel_to_angle": {
+      "eqn": "atan(x/9.80665)*180/pi",
+      "prefix": "mul atan div x k0 k1",
+      "constants": {"k0": "9.80665", "k1": "180/pi"}
+    },
+    "tau": {
+      "eqn": "0.25*log(3*x**2 + 6*x + 1) - sqrt(6)/24*log((x+1-sqrt(2/3))/(x + 1 + sqrt(2/3)))",
+      "prefix": "sub mul div INT+ 1 INT+ 4 log add mul INT+ 3 pow x INT+ 2 add mul INT+ 6 x INT+ 1 mul k0 log div sub add x INT+ 1 k1 add x add INT+ 1 k1",
+      "constants": {"k0": "sqrt(6)/24", "k1": "sqrt(2/3)"}
+    }
+  },
+  "paparazzi": {
+    "isometric_latitude0": {
+      "eqn": "log(tan(pi/4+x/2))",
+      "prefix": "log tan add div pi INT+ 4 div x INT+ 2",
+      "constants": {}
+    },
+    "pprz_isa_pressure_of_altitude": {
+      "eqn": "101325*exp((-1/8434.667799)*x)",
+      "prefix": "mul k0 exp mul k1 x",
+      "constants": {"k0": "101325", "k1": "-1/8434.667799"}
+    },
+    "pprz_isa_altitude_of_pressure": {
+      "eqn": "8434.667799*log(101325/x)",
+      "prefix": "mul k0 log div k1 x",
+      "constants": {"k1": "101325", "k0": "8434.667799"}
+    },
+    "pprz_isa_pressure_of_height": {
+      "eqn": "0.8*101325*exp((-1/8434.667799)*x)",
+      "prefix": "mul k0 exp mul k1 x",
+      "constants": {"k0": "0.8*101325", "k1": "-1/8434.667799"}
+    },
+    "pprz_isa_height_of_pressure_full": {
+      "eqn": "(1-pow(x/(0.8*101325), 0.19029848))*288.15/0.0065",
+      "prefix": "mul sub INT +1 pow div x k0 k1 k2",
+      "constants": {"k0": "0.8*101325", "k1": "0.19029848", "k2": "288.15/0.0065"}
+    },
+    "pprz_isa_height_of_pressure": {
+      "eqn": "8434.667799*log(0.8*101325/x)",
+      "prefix": "mul k0 log div k1 x",
+      "constants": {"k1": "0.8*101325", "k0": "8434.667799"}
+    },
+    "pprz_isa_temperature_of_altitude": {
+      "eqn": "288.15 - 0.0065*x",
+      "prefix": "sub k0 mul k1 x",
+      "constants": {"k0": "288.15", "k1": "0.0065"}
+    },
+    "change_rep": {
+      "eqn": "pi/2-x",
+      "prefix": "sub div pi INT+ 2 x",
+      "constants": {}
+    },
+    "eas_from_dynamic_pressure": {
+      "eqn": "sqrt(x*2/1.225)",
+      "prefix": "pow mul x k0 div INT+ 1 INT+ 2",
+      "constants": {"k0": "2/1.225"}
+    },
+    "nmea_degree2radian": {
+      "eqn": "x*pi/180",
+      "prefix": "mul x k0",
+      "constants": {"k0": "pi/180"}
+    },
+    "nmea_radian2degree": {
+      "eqn": "x*180/pi",
+      "prefix": "mul x k0",
+      "constants": {"k0": "180/pi"}
+    }
+  },
+  "cleanflight": {
+    "invSqrt": {
+      "eqn": "1/sqrt(x)",
+      "prefix": "pow x div INT- 1 INT+ 2",
+      "constants": {}
+    },
+    "pressureToAltitude": {
+      "eqn": "1-pow(x/101325, 0.190295)*4433000",
+      "prefix": "sub INT+ 1 mul pow div x k0 k1 k2",
+      "constants": {"k0": "101325", "k1": "0.190295", "k2": "4433000"}
+    },
+    "dynThrottle": {
+      "eqn": "x*(1-x*x/3)*1.5",
+      "prefix": "mul x mul sub INT+ 1 div pow x INT+ 2 INT+ 3 div INT+ 3 INT+ 2",
+      "constants": {}
+    },
+    "calculateAccZLowPassFilterRCTimeConstant": {
+      "eqn": "0.5/pi/x",
+      "prefix": "div k0 x",
+      "constants": {"k0": "0.5/pi"}
+    },
+    "calculateThrottleAngleScale": {
+      "eqn": "1800/pi*900/x",
+      "prefix": "div k0 x",
+      "constants": {"k0": "1800/pi*900"}
+    },
+    "sin_approx": {
+      "eqn": "sin(x)",
+      "prefix": "",
+      "constants": {}
+    },
+    "acos_approx": {
+      "eqn": "acos(x)",
+      "prefix": "",
+      "constants": {}
+    }
+  },
+  "nn_funcs": {
+    "logistic": {
+      "eqn": "1/(1+exp(-x))",
+      "prefix": "div INT+ 1 add INT+ 1 exp mul INT- 1 x",
+      "constants": {}
+    },
+    "rbf": {
+      "eqn": "exp(-(x-0.7328)**2/1.9983)",
+      "prefix": "exp div pow sub x k0 INT+ 2 k1",
+      "constants": {"k0": "0.7328", "k1": "1.9983"}
+    },
+    "multiquad": {
+      "eqn": "sqrt((x-0.881)**2+8.66172)",
+      "prefix": "pow add pow sub x k0 INT+ 2 k1 div INT+ 1 INT+ 2",
+      "constants": {"k0": "0.881", "k1": "8.66172"}
+    },
+    "invmultiquad": {
+      "eqn": "1/sqrt((x-0.881)**2+8.66172)",
+      "prefix": "pow add pow sub x k0 INT+ 2 k1 div INT- 1 INT+ 2",
+      "constants": {"k0": "0.881", "k1": "8.66172"}
+    },
+    "tanh": {
+      "eqn": "tanh(x)",
+      "prefix": "div sub exp x exp mul INT- 1 x add exp x exp mul INT- 1 x",
+      "constants": {}
+    },
+    "softplus": {
+      "eqn": "log(1+exp(x))",
+      "prefix": "log add INT+ 1 exp x",
+      "constants": {}
+    },
+    "silu": {
+      "eqn": "x/(1+exp(-x))",
+      "prefix": "div x add INT+ 1 exp mul INT- 1 x",
+      "constants": {}
+    }
+  },
+  "ntc_thermistor": {
+    "resistanceToKelvins": {
+      "eqn": "1/25+log(x/100000)/3950",
+      "prefix": "add div INT+ 1 INT+ 2 5 div log div x k0 k1",
+      "constants": {"k0": "100000", "k1": "3950"}
+    },
+    "readResistance": {
+      "eqn": "8000/(1024/x - 1)",
+      "prefix": "div k0 sub div k1 x INT+ 1",
+      "constants": {"k0": "8000", "k1": "1024"}
+    },
+    "celsiusToKelvins": {
+      "eqn": "x+273.15",
+      "prefix": "add x k0",
+      "constants": {"k0": "273.15"}
+    },
+    "kelvinsToCelsius": {
+      "eqn": "x-273.15",
+      "prefix": "sub x k0",
+      "constants": {"k0": "273.15"}
+    },
+    "celsiusToFahrenheit": {
+      "eqn": "x*1.8+32",
+      "prefix": "add mul x div INT+ 9 INT+ 5 INT+ 3 2",
+      "constants": {}
+    },
+    "kelvinsToFahrenheit": {
+      "eqn": "(x-273.15)*1.8+32",
+      "prefix": "add mul sub x k0 div INT+ 9 INT+ 5 INT+ 3 2",
+      "constants": {"k0": "273.15"}
+    }
+  },
+  "arduino_sensorkit": {
+    "calcAltitude": {
+      "eqn": "(1-(x/101325)**(1/5.25588))/0.0000225577",
+      "prefix": "div sub INT+ 1 pow div x k0 k1 k2",
+      "constants": {"k0": "101325", "k1": "1/5.25588", "k2": "0.000225577"}
+    },
+    "convertCtoF": {
+      "eqn": "x*1.8+32",
+      "prefix": "add mul x div INT+ 9 INT+ 5 INT+ 3 2",
+      "constants": {}
+    }
+  },
+  "pathtracing": {
+    "fresnelReflectanceAtNormal": {
+      "eqn": "(1-x)**2/(1+x)**2",
+      "prefix": "pow div sub INT+ 1 x add INT+ 1 x INT+ 2",
+      "constants": {}
+    },
+    "blinToBeckmann": {
+      "eqn": "sqrt(2/(2+x))",
+      "prefix": "pow div INT+ 2 add x INT+ 2 div INT+ 1 INT+ 2",
+      "constants": {}
+    },
+    "beckmannToBlinn": {
+      "eqn": "2/x**2 - 2",
+      "prefix": "add div INT+ 2 mul x x INT- 2",
+      "constants": {}
+    }
+  }
+}

real_world_dataset/eval_dataset.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import sympy as sp
+import numpy as np
+import warnings
+from sympy.abc import x
+import sys
+import json
+from tqdm import tqdm
+from remend.tools.parser import parse_prefix_to_sympy
+warnings.simplefilter("ignore")
+def percent(a, n):
+    return f"{a/n*100:0.1f}%"
+def do_eval_match(orig_expr, gen_expr):
+    try:
+        origl = sp.lambdify(x, orig_expr)
+        genl = sp.lambdify(x, gen_expr)
+        count = 0
+        for v in np.arange(0.2, 1, 0.01):
+            o = origl(v)
+            g = genl(v)
+            if o != o or o == float('inf'):
+                continue
+            if g != g or g == float('inf'):
+                continue
+            # if type(o) != np.float64 or type(g) != np.float64:
+            #     print(orig_expr, o, gen_expr, g)
+            #     return False
+            if abs((o-g)/o) > 1e-5:
+                return False
+            count += 1
+    except:
+        return False
+    return count >= 5
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser("Check generated expressions")
+    parser.add_argument("-g", required=True, help="Generated expressions file")
+    parser.add_argument("-i", required=True, help="Info file")
+    parser.add_argument("-r", required=True, help="Results file")
+    args = parser.parse_args()
+    gens = []
+    with open(args.g, 'r') as genf, open(args.i) as infof:
+        for line in tqdm(genf, desc="Reading file"):
+            comps = line.strip().split("\t")
+            if line[0] == 'H':
+                num = int(comps[0][2:])
+                tokens = comps[2].split(" ")
+                info = next(infof)
+                info = json.loads(info.strip())
+                if info["eqn"] == "":
+                    continue
+                gens.append((num, tokens, info))
+    parsed = []
+    matched = []
+    results = []
+    for n, toks, info in tqdm(gens, desc="Evaluating expressions"):
+        res = {"id": n, "parsed": False, "matched": False, "orig": "", "gen": ""}
+        if "<<unk>>" in toks:
+            # Not parsed
+            results.append(res)
+            continue
+        try:
+            gen_expr = parse_prefix_to_sympy(toks)
+        except Exception as e:
+            # Not parsed
+            results.append(res)
+            continue
+        res["parsed"] = True
+        parsed.append(n)
+        const = info["constants"]
+        gen_expr = gen_expr.subs([(sp.Symbol("k"+c), const[c]) for c in const])
+        orig_expr = sp.parse_expr(info["eqn"], local_dict={"x0":x})
+        res["orig"] = str(orig_expr)
+        res["gen"] = str(gen_expr)
+        if not do_eval_match(orig_expr, gen_expr):
+            results.append(res)
+            continue
+        res["matched"] = True
+        matched.append(n)
+        results.append(res)
+    with open(args.r, "w") as resf:
+        for res in results:
+            resf.write("{id} {parsed} {matched} \"{orig}\" \"{gen}\"\n".format(**res))
+        resf.write("\n")
+        N = len(gens)
+        print("Total", N, file=resf)
+        print("Parsed", len(parsed), percent(len(parsed), N), file=resf)
+        print("Matched", len(matched), percent(len(matched), N), file=resf)

real_world_dataset/generate.sh ADDED Viewed

	@@ -0,0 +1,21 @@

+#!/bin/bash
+ARCHS=( arm32 aarch64 x64 )
+TOKENIZERS=$HOME/projects/decode_ML/dlsym/tokenized
+MODELS=$HOME/projects/decode_ML/dlsym/ablation
+MODEL=base
+DS=dataset
+GEN=generated/${MODEL}
+mkdir -p ${GEN}
+for arch in ${ARCHS[@]}
+do
+    tok=${TOKENIZERS}/${arch}/tokenized_dlsm_${arch}
+    echo python3 -m remend.tools.bpe_apply -t ${tok}/asm_tokens.json -i ${DS}/${arch}.asm -o ${GEN}/${arch}_tokenized.asm
+    python3 -m remend.tools.bpe_apply -t ${tok}/asm_tokens.json -i ${DS}/${arch}.asm -o ${GEN}/${arch}_tokenized.asm
+    fairseq-interactive ${tok} --beam 1 --path ${MODELS}/trained_${arch}_${MODEL}/checkpoint_best.pt < ${GEN}/${arch}_tokenized.asm > ${GEN}/${arch}_generated_beam1.txt 2>/dev/null
+    fairseq-interactive ${tok} --beam 5 --path ${MODELS}/trained_${arch}_${MODEL}/checkpoint_best.pt < ${GEN}/${arch}_tokenized.asm > ${GEN}/${arch}_generated_beam5.txt 2>/dev/null
+    python3 eval_dataset.py -g ${GEN}/${arch}_generated_beam1.txt -i ${DS}/${arch}.info -r ${GEN}/${arch}_res_beam1.txt
+    python3 eval_dataset.py -g ${GEN}/${arch}_generated_beam5.txt -i ${DS}/${arch}.info -r ${GEN}/${arch}_res_beam5.txt
+done

real_world_dataset/make.sh ADDED Viewed

	@@ -0,0 +1,38 @@

+#!/bin/bash
+BUILD=build
+SRC=src
+A32=arm-linux-gnueabihf-
+A64=aarch64-linux-gnu-
+X64=
+mkdir -p ${BUILD}
+OPTS=(O0 O1 O2 O3)
+CFILES=(ardupilot paparazzi cleanflight linux_color_cvt ntc_thermistor arduino_sensorkit pathtracing)
+for opt in ${OPTS[@]}
+do
+    for f in ${CFILES[@]}
+    do
+        # Compile
+        ${A64}gcc -${opt} -o ${BUILD}/${f}_aarch64_${opt}.elf ${SRC}/${f}.c -lm
+        ${X64}gcc -${opt} -o ${BUILD}/${f}_x64_${opt}.elf ${SRC}/${f}.c -lm
+        ${A32}gcc -${opt} -o ${BUILD}/${f}_arm32_${opt}.elf ${SRC}/${f}.c -lm
+        # Generate ASM source
+        ${A64}gcc -S -${opt} -o ${BUILD}/${f}_aarch64_${opt}.s ${SRC}/${f}.c
+        ${X64}gcc -S -${opt} -o ${BUILD}/${f}_x64_${opt}.s ${SRC}/${f}.c
+        ${A32}gcc -S -${opt} -o ${BUILD}/${f}_arm32_${opt}.s ${SRC}/${f}.c
+    done
+    # Compile
+    ${A64}gfortran -std=gnu -${opt} -o ${BUILD}/nn_funcs_aarch64_${opt}.elf ${SRC}/nn_funcs.f90
+    ${X64}gfortran -std=gnu -${opt} -o ${BUILD}/nn_funcs_x64_${opt}.elf ${SRC}/nn_funcs.f90
+    ${A32}gfortran -std=gnu -${opt} -o ${BUILD}/nn_funcs_arm32_${opt}.elf ${SRC}/nn_funcs.f90
+    # Generate ASM source
+    ${A64}gfortran -S -std=gnu -${opt} -o ${BUILD}/nn_funcs_aarch64_${opt}.s ${SRC}/nn_funcs.f90
+    ${X64}gfortran -S -std=gnu -${opt} -o ${BUILD}/nn_funcs_x64_${opt}.s ${SRC}/nn_funcs.f90
+    ${A32}gfortran -S -std=gnu -${opt} -o ${BUILD}/nn_funcs_arm32_${opt}.s ${SRC}/nn_funcs.f90
+done

real_world_dataset/preprocess.sh ADDED Viewed

	@@ -0,0 +1,21 @@

+#!/bin/bash
+ARCHS=( arm32 aarch64 x64 )
+TOKENIZERS=$HOME/projects/decode_ML/dlsym/tokenized
+MODELS=$HOME/projects/decode_ML/dlsym/ablation
+MODEL=base
+DS=dataset
+GEN=generated/${MODEL}
+mkdir -p ${GEN}
+for arch in ${ARCHS[@]}
+do
+    tok=${TOKENIZERS}/${arch}/tokenized_dlsm_${arch}
+    echo python3 -m remend.tools.bpe_apply -t ${tok}/asm_tokens.json -i ${DS}/${arch}.asm -o ${GEN}/${arch}_tokenized.asm
+    python3 -m remend.tools.bpe_apply -t ${tok}/asm_tokens.json -i ${DS}/${arch}.asm -o ${GEN}/${arch}_tokenized.asm
+    fairseq-interactive ${tok} --beam 1 --path ${MODELS}/trained_${arch}_${MODEL}/checkpoint_best.pt < ${GEN}/${arch}_tokenized.asm > ${GEN}/${arch}_generated_beam1.txt 2>/dev/null
+    fairseq-interactive ${tok} --beam 5 --path ${MODELS}/trained_${arch}_${MODEL}/checkpoint_best.pt < ${GEN}/${arch}_tokenized.asm > ${GEN}/${arch}_generated_beam5.txt 2>/dev/null
+    python3 eval_dataset.py -g ${GEN}/${arch}_generated_beam1.txt -i ${DS}/${arch}.info -r ${GEN}/${arch}_res_beam1.txt
+    python3 eval_dataset.py -g ${GEN}/${arch}_generated_beam5.txt -i ${DS}/${arch}.info -r ${GEN}/${arch}_res_beam5.txt
+done

real_world_dataset/related_evals/run_btc.sh ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ #!/bin/bash
2	+
3	+ fairseq-interactive c/c_xA/data-bin/data.toked.src-tgt/ --beam 10 --nbest 4 --source-lang src --target-lang tgt --path c/c_xA/checkpoint_best.pt < btc_asm_tok.txt > btc_predictions.txt

real_world_dataset/related_evals/run_nova.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import json
+import torch
+from tqdm import tqdm
+from transformers import AutoTokenizer
+from modeling_nova import NovaTokenizer, NovaForCausalLM
+import time
+tokenizer = AutoTokenizer.from_pretrained('lt-asset/nova-6.7b-bcr', trust_remote_code=True)
+if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:
+    print('Vocabulary:', len(tokenizer.get_vocab()))    # 32280
+tokenizer.pad_token = tokenizer.eos_token
+tokenizer.pad_token_id = tokenizer.eos_token_id
+nova_tokenizer = NovaTokenizer(tokenizer)
+model = NovaForCausalLM.from_pretrained('lt-asset/nova-6.7b-bcr').eval()
+with open("../slade/slade_cps_dataset.json", "r") as f:
+    dataset = json.load(f)
+CFI_START=".cfi_startproc"
+CFI_END=".cfi_endproc"
+def process_asm(asm):
+    if CFI_START in asm:
+        s = asm.index(CFI_START) + len(CFI_START)
+    else:
+        s = 0
+    if CFI_END in asm:
+        e = asm.index(CFI_END)
+    else:
+        e = 0
+    prompt_before = f'# This is the assembly code:\n<func0>:\n'
+    prompt_after = '\nWhat is the source code?\n'
+    lines = filter(lambda s: (".cfi_" not in s) and (s.strip() != ""), asm[s:e].split("\n\t"))
+    asm = "\n".join(f"{l}\t<label-{i+1}>" for i, l in enumerate(lines))
+    char_types = ("0" * len(prompt_before)) + ("1" * len(asm)) + ("0" * len(prompt_after))
+    return prompt_before + asm + prompt_after, char_types
+asms = [(prog, func, asm) for prog in dataset for func, asm in dataset[prog].items() if "_x64_" in prog]
+with open("nova_predictions.txt", "w") as predf:
+    for prog, func, asm in tqdm(asms):
+        start = time.time()
+        inputs, char_types = process_asm(asm)
+        toks = nova_tokenizer.encode(inputs, "", char_types)
+        input_ids = torch.LongTensor(toks['input_ids'].tolist()).unsqueeze(0)
+        nova_attention_mask = torch.LongTensor(toks['nova_attention_mask']).unsqueeze(0)
+        no_mask_id = torch.LongTensor([toks['no_mask_idx']])
+        outputs = model.generate(
+            inputs=input_ids, max_new_tokens=512, temperature=0.2, top_p=0.95, num_return_sequences=3,
+            do_sample=True, nova_attention_mask=nova_attention_mask, no_mask_idx=no_mask_id,
+            pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id
+        )
+        end = time.time()
+        predf.write(f"{prog} {func} time= {end-start}\n")
+        for output in outputs:
+            outc = tokenizer.decode(output[input_ids.size(1):], skip_special_tokens=True, clean_up_tokenization_spaces=True)
+            predf.write(f"\t{outc}\n")

real_world_dataset/related_evals/run_slade.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import os
+import re
+import json
+import torch
+import itertools as it
+from tokenizers import Tokenizer
+from transformers import BartForConditionalGeneration
+import time
+from tqdm import tqdm
+class InferenceDataProcessor:
+    def __init__(self, tokenizer: Tokenizer):
+        self.tokenizer = tokenizer
+    def tokenize_opt3_to_c(self, one_sample):  # one_sample must be a function definition (and ONLY a function definition)
+        # Add some specific special tokens by hand:
+        one_sample = f"<c> <mask:0> </c> <intel> <opt3> {one_sample} </opt> </intel>"
+        return self.tokenizer.encode(self.tokenizer.normalizer.normalize_str(one_sample)).ids
+    def tokenize_s_to_c(self, one_sample):  # one_sample must be a function definition (and ONLY a function definition)
+        # Add some specific special tokens by hand:
+        one_sample = f"<c> <mask:0> </c> <intel> {one_sample} </intel>"
+        return self.tokenizer.encode(self.tokenizer.normalizer.normalize_str(one_sample)).ids
+    def tokenize_arm_to_c(self, one_sample):  # one_sample must be a function definition (and ONLY a function definition)
+        # Add some specific special tokens by hand:
+        one_sample = f"<c> <mask:0> </c> <arm> {one_sample} </arm>"
+        return self.tokenizer.encode(self.tokenizer.normalizer.normalize_str(one_sample)).ids
+    def tokenize_arm_opt3_to_c(self, one_sample):  # one_sample must be a function definition (and ONLY a function definition)
+        # Add some specific special tokens by hand:
+        one_sample = f"<c> <mask:0> </c> <arm> <opt3> {one_sample} </opt> </arm>"
+        return self.tokenizer.encode(self.tokenizer.normalizer.normalize_str(one_sample)).ids
+    def prepare(self, asm, pair):
+        if pair == 's_c-c':
+            return self.tokenize_s_to_c(asm)
+        elif pair == 'opt3_c-c':
+            return self.tokenize_opt3_to_c(asm)
+        elif pair == 'arm_c-c':
+            return self.tokenize_arm_to_c(asm)
+        elif pair == 'arm_opt3_c-c':
+            return self.tokenize_arm_opt3_to_c(asm)
+    def detokenize(self, one_sample, remove_mask=True):
+        # Ugly hack. We can't directly use decode() (we could if we had a proper Postprocessor) remove some special tokens by hand (they can't be skipped as the others)
+        detok = self.tokenizer.decode(one_sample, skip_special_tokens=False)
+        detok = detok.replace('<eol> ', '\n').replace('<eol>', '\n').replace('<tab> ', '\t').replace('<tab>', '\t')
+        if remove_mask:
+            detok = detok.replace('<mask:0>', '')
+        detok = detok.replace('<pad>', '').replace('<s>', '').replace('</s>', '')
+        detok = re.sub('# (/\w+)*', '', detok)
+        detok = detok.replace('0x ', '0x').replace(' #', '').replace('return', 'return ').replace('return  ', 'return ')
+        detok = detok.replace('static', '').replace('inline', '')
+        detok = re.sub('# (/\w+)*', '', detok)
+        detok = detok.replace('__attribute__((used))', '')
+        return detok
+with open("../dataset.json") as f:
+    FUNCS = json.load(f)
+DTYPES = ["f", "d"]
+DATASET = "slade_cps_dataset.json"
+PREDICTIONS = "slade_predictions.json"
+SLADE_PATH = "."
+MODELS = {
+    'opt3_c-c': 'output/export-new_train-2023-04-28-2313-1dff-748d-checkpoint_best', # x86-O3
+    's_c-c': 'output/export-new_train-2023-04-09-1854-b799-06dc-checkpoint_best',  # x86-O0
+    'arm_opt3_c-c': 'output/export-new_train-2023-05-06-0743-1dff-ae24-checkpoint_best',  # ARM O3
+    'arm_c-c': 'output/export-new_train-2023-04-29-0149-1dff-7342-checkpoint_best'  # ARM O0
+}
+BEAM=5
+NBEST=1
+EARLY_STOPPING = True
+LENGTH_PENALTY = 1.0
+MIN_LENGTH = 1
+MAX_NEW_TOKENS = 512
+# DIRECTION = "s_c-c" # x86-O0
+# DIRECTION = 'opt3_c-c' # x86-O3
+# DIRECTION = 'arm_c-c' # ARM-O0
+DIRECTION = 'arm_opt3_c-c' # ARM-O3
+if DIRECTION == "s_c-c":
+    OPTS = [0]
+    ARCHS = ["x64"]
+if DIRECTION == "opt3_c-c":
+    OPTS = [1, 2]
+    ARCHS = ["x64"]
+if DIRECTION == "arm_c-c":
+    OPTS = [0]
+    ARCHS = ["aarch64"]
+if DIRECTION == "arm_opt3_c-c":
+    OPTS = [1, 2]
+    ARCHS = ["aarch64"]
+model_path = os.path.join(SLADE_PATH, MODELS[DIRECTION])
+tok = Tokenizer.from_file(os.path.join(model_path, 'tokenizer.json'))
+model = BartForConditionalGeneration.from_pretrained(model_path).eval()
+data_processor = InferenceDataProcessor(tokenizer=tok)
+def predict_one(asm, pair):
+    tokenized = data_processor.prepare(asm, pair)
+    if len(tokenized) > model.config.max_position_embeddings:
+        print('Too long')
+        return ['']
+    batch = torch.tensor(tokenized).unsqueeze(0)
+    output = model.generate(batch, max_new_tokens=MAX_NEW_TOKENS, num_beams=BEAM,
+                                 num_return_sequences=NBEST, early_stopping=EARLY_STOPPING,
+                                 length_penalty=LENGTH_PENALTY, min_length=MIN_LENGTH)
+    hyps = []
+    for hyp in output:
+        detokenized = data_processor.detokenize(hyp.cpu().tolist())
+        hyps.append(detokenized)
+    return hyps
+# print(predict_one(ASM, DIRECTION))
+with open(DATASET) as f:
+    dataset = json.load(f)
+with open(PREDICTIONS) as f:
+    predictions = json.load(f)
+with tqdm(total=44*len(DTYPES)*len(OPTS), desc="Running") as t:
+    for arch, binary, opt in it.product(ARCHS, FUNCS.keys(), OPTS):
+        name = f"{binary}_{arch}_O{opt}"
+        for func, dtype in it.product(FUNCS[binary].keys(), DTYPES):
+            fname = f"{func}_{dtype}"
+            if not fname in dataset[name]:
+                print("Not found!!", fname, name)
+                continue
+            asm = dataset[name][fname]
+            start = time.time()
+            pred = predict_one(asm, DIRECTION)
+            end = time.time()
+            predictions[f"{name}_{fname}"] = {"eq": FUNCS[binary][func]["eqn"], "pred": pred[0], "pass": False, "time": end-start}
+            t.update()
+with open(PREDICTIONS, "w") as f:
+    json.dump(predictions, f, indent=2)