Spaces:

elvis-hf
/

regularization

Sleeping

App Files Files Community

joel-woodfield commited on Dec 7, 2025

Commit

0562a88

1 Parent(s): c8ba6d6

Add ability to change how x points are selected

Browse files

Files changed (1) hide show

dataset.py +35 -17

dataset.py CHANGED Viewed

@@ -26,21 +26,18 @@ def get_function(function, x1lim, x2lim, nsample=100):
     return mesh_x1, mesh_x2, y
-def get_data_points(function, x1lim, x2lim, nsample=10, sigma=0., seed=0):
-    num_points_to_generate = 100
-    if nsample > num_points_to_generate:
-        raise ValueError(f"nsample too large, limit to {num_points_to_generate}")
-    rng = np.random.default_rng(seed)
-    x1 = rng.uniform(x1lim[0], x1lim[1], size=num_points_to_generate)
-    x1 = x1[:nsample]
-    # Not sure why I put sorting here...
-    # x1 = np.sort(x1)
-    x2 = rng.uniform(x2lim[0], x2lim[1], size=num_points_to_generate)
-    x2 = x2[:nsample]
-    # Not sure why I put sorting here...
-    # x2 = np.sort(x2)
     rng = np.random.default_rng(seed)
     noise = sigma * rng.standard_normal(nsample)
@@ -58,11 +55,12 @@ class Dataset:
     def __init__(
         self,
         mode: str = "generate",
-        function: str = "25 * x1 + 50 * x2",
         x1lim: tuple[float, float] = (-1, 1),
         x2lim: tuple[float, float] = (-1, 1),
         nsample: int = 100,
-        sigma: float = 0.0,
         seed: int = 0,
         csv_path: str | None = None,
     ):
@@ -73,6 +71,7 @@ class Dataset:
         self.x2lim = x2lim
         self.nsample = nsample
         self.sigma = sigma
         self.seed = seed
         self.csv_path = csv_path
@@ -95,6 +94,7 @@ class Dataset:
                 x2lim=self.x2lim,
                 nsample=self.nsample,
                 sigma=self.sigma,
                 seed=self.seed,
             )
@@ -121,6 +121,7 @@ class Dataset:
             x2lim=kwargs.get("x2lim", self.x2lim),
             nsample=kwargs.get("nsample", self.nsample),
             sigma=kwargs.get("sigma", self.sigma),
             seed=kwargs.get("seed", self.seed),
             csv_path=kwargs.get("csv_path", self.csv_path),
         )
@@ -142,6 +143,7 @@ class Dataset:
                 self._safe_hash(self.x2lim[1]),
                 self.nsample,
                 self.sigma,
                 self.seed,
                 self.csv_path,
             )
@@ -201,6 +203,12 @@ class DatasetView:
         return state
     def upload_csv(self, file, state):
         try:
             state = state.update(
@@ -280,6 +288,11 @@ class DatasetView:
                     value=f"{options.x2lim[0]}, {options.x2lim[1]}",
                     interactive=True,
                 )
             with gr.Row():
                 sigma = gr.Number(
@@ -320,6 +333,11 @@ class DatasetView:
             inputs=[x2_textbox, state],
             outputs=[state],
         )
         sigma.submit(
             lambda sig, s: s.update(sigma=sig),
             inputs=[sigma, state],

     return mesh_x1, mesh_x2, y
+def get_data_points(function, x1lim, x2lim, nsample=10, sigma=0., random_x=False, seed=0):
+    if random_x:
+        rng = np.random.default_rng(seed)
+        x1 = rng.uniform(x1lim[0], x1lim[1], size=nsample)
+        x2 = rng.uniform(x2lim[0], x2lim[1], size=nsample)
+    else:
+        size = int(np.ceil(np.sqrt(nsample)))
+        x1 = np.linspace(x1lim[0], x1lim[1], size)
+        x2 = np.linspace(x2lim[0], x2lim[1], size)
+        x1, x2 = np.meshgrid(x1, x2)
+        x1 = x1.ravel()[:nsample]
+        x2 = x2.ravel()[:nsample]
     rng = np.random.default_rng(seed)
     noise = sigma * rng.standard_normal(nsample)
     def __init__(
         self,
         mode: str = "generate",
+        function: str = "25 * x1 + 30 * x2",
         x1lim: tuple[float, float] = (-1, 1),
         x2lim: tuple[float, float] = (-1, 1),
         nsample: int = 100,
+        sigma: float = 0.1,
+        random_x: bool = False,
         seed: int = 0,
         csv_path: str | None = None,
     ):
         self.x2lim = x2lim
         self.nsample = nsample
         self.sigma = sigma
+        self.random_x = random_x
         self.seed = seed
         self.csv_path = csv_path
                 x2lim=self.x2lim,
                 nsample=self.nsample,
                 sigma=self.sigma,
+                random_x=self.random_x,
                 seed=self.seed,
             )
             x2lim=kwargs.get("x2lim", self.x2lim),
             nsample=kwargs.get("nsample", self.nsample),
             sigma=kwargs.get("sigma", self.sigma),
+            random_x=kwargs.get("random_x", self.random_x),
             seed=kwargs.get("seed", self.seed),
             csv_path=kwargs.get("csv_path", self.csv_path),
         )
                 self._safe_hash(self.x2lim[1]),
                 self.nsample,
                 self.sigma,
+                self.random_x,
                 self.seed,
                 self.csv_path,
             )
         return state
+    def update_x_selection_method(self, method: str, state: gr.State):
+        random_x = method == "Uniformly sampled"
+        print("Updating random_x to", random_x)
+        state = state.update(random_x=random_x)
+        return state
     def upload_csv(self, file, state):
         try:
             state = state.update(
                     value=f"{options.x2lim[0]}, {options.x2lim[1]}",
                     interactive=True,
                 )
+                x_selection_method = gr.Radio(
+                    label="How to select x points",
+                    choices=["Evenly spaced", "Uniformly sampled"],
+                    value="Evenly spaced",
+                )
             with gr.Row():
                 sigma = gr.Number(
             inputs=[x2_textbox, state],
             outputs=[state],
         )
+        x_selection_method.change(
+            fn=self.update_x_selection_method,
+            inputs=[x_selection_method, state],
+            outputs=[state],
+        )
         sigma.submit(
             lambda sig, s: s.update(sigma=sig),
             inputs=[sigma, state],