ahuang11 commited on
Commit
219309c
·
1 Parent(s): 95438b1

Try to download manually

Browse files
Files changed (1) hide show
  1. app.py +33 -8
app.py CHANGED
@@ -1,10 +1,15 @@
 
 
 
 
1
  import cartopy.crs as ccrs
2
  import fugue.api as fa
3
  import geopandas as gpd
4
  import geoviews as gv
5
  import panel as pn
 
6
  import pyarrow as pa
7
- from datasets import load_dataset, Split
8
  from holoviews.streams import RangeXY
9
  from shapely import wkt
10
 
@@ -34,17 +39,31 @@ INTRO = """
34
  and all their supporting dependencies.
35
  """
36
 
 
 
 
37
  QUERY_FMT = """
38
- df = CREATE USING load_hf(path="ahuang11/tiger_layer_edges")
39
  df_sel = SELECT STATEFP, COUNTYFP, FULLNAME, geometry \
40
  FROM df WHERE FULLNAME == '{{name}}'
41
  """
42
 
43
 
44
- def load_hf(path:str) -> pa.Table:
45
- dataset = load_dataset(path, split=Split.ALL).data
46
- table = pa.Table.from_batches(dataset.to_batches())
47
- return table
 
 
 
 
 
 
 
 
 
 
 
48
 
49
 
50
  class MapnStreets:
@@ -66,6 +85,7 @@ class MapnStreets:
66
  pn.state.onload(self.onload)
67
 
68
  def onload(self):
 
69
  self.name_input.param.trigger("value")
70
 
71
  range_xy = RangeXY()
@@ -92,14 +112,19 @@ class MapnStreets:
92
  try:
93
  name = name.strip()
94
  self.holoviews_pane.loading = True
95
- query_fmt = QUERY_FMT
96
  if "*" in name or "%" in name:
97
  name = name.replace("*", "%")
98
  query_fmt = query_fmt.replace("==", "LIKE")
99
  if name == "%":
100
  return
101
  df = fa.as_pandas(
102
- fa.fugue_sql(query_fmt, name=name, engine="duckdb", as_local=True)
 
 
 
 
 
 
103
  )
104
  self.gdf = self.serialize_geom(df)
105
  county_gdf = self.gdf.drop_duplicates(
 
1
+ import os
2
+ from pathlib import Path
3
+ from urllib.request import urlretrieve
4
+
5
  import cartopy.crs as ccrs
6
  import fugue.api as fa
7
  import geopandas as gpd
8
  import geoviews as gv
9
  import panel as pn
10
+ import pandas as pd
11
  import pyarrow as pa
12
+ from datasets import get_dataset_infos
13
  from holoviews.streams import RangeXY
14
  from shapely import wkt
15
 
 
39
  and all their supporting dependencies.
40
  """
41
 
42
+ DATA_DIR = Path.home() / ".cache" / "huggingface" / "datasets"
43
+ DATA_PATH = DATA_DIR / "edges.parquet"
44
+
45
  QUERY_FMT = """
46
+ df = LOAD_DATASET({{path}})
47
  df_sel = SELECT STATEFP, COUNTYFP, FULLNAME, geometry \
48
  FROM df WHERE FULLNAME == '{{name}}'
49
  """
50
 
51
 
52
+ def download_hf(path: str, **kwargs):
53
+ info = list(get_dataset_infos(path).values())[0]
54
+ file_df = pd.DataFrame({"file": list(info.download_checksums.keys())})
55
+ DATA_PATH.makedirs(exist_ok=True)
56
+
57
+ def _download(df: pd.DataFrame) -> None:
58
+ for file in df["file"]:
59
+ file_name = os.path.basename(file)
60
+ data_path = DATA_PATH / file_name
61
+ done_path = data_path.with_suffix(".done")
62
+ if not done_path.exists():
63
+ urlretrieve(file, data_path)
64
+ done_path.touch()
65
+
66
+ fa.out_transform(file_df, _download, **kwargs)
67
 
68
 
69
  class MapnStreets:
 
85
  pn.state.onload(self.onload)
86
 
87
  def onload(self):
88
+ download_hf("ahuang11/tiger_layer_edges")
89
  self.name_input.param.trigger("value")
90
 
91
  range_xy = RangeXY()
 
112
  try:
113
  name = name.strip()
114
  self.holoviews_pane.loading = True
 
115
  if "*" in name or "%" in name:
116
  name = name.replace("*", "%")
117
  query_fmt = query_fmt.replace("==", "LIKE")
118
  if name == "%":
119
  return
120
  df = fa.as_pandas(
121
+ fa.fugue_sql(
122
+ query_fmt,
123
+ data_path=str(DATA_PATH.absolute()),
124
+ name=name,
125
+ engine="duckdb",
126
+ as_local=True,
127
+ )
128
  )
129
  self.gdf = self.serialize_geom(df)
130
  county_gdf = self.gdf.drop_duplicates(