wenbemi commited on
Commit
80b6b64
·
verified ·
1 Parent(s): a225344

Update chat_a.py

Browse files
Files changed (1) hide show
  1. chat_a.py +12 -8
chat_a.py CHANGED
@@ -2,7 +2,7 @@
2
  # coding: utf-8
3
 
4
  # In[10]:
5
- import os, io, pandas as pd
6
  from huggingface_hub import hf_hub_download
7
  import pandas as pd
8
  import torch
@@ -33,17 +33,21 @@ def _read_csv_bytes(b: bytes) -> pd.DataFrame:
33
  except UnicodeDecodeError:
34
  return pd.read_csv(io.BytesIO(b), encoding="cp949")
35
 
36
- def load_csv_smart(local_path: str, hub_filename: str,
37
- repo_id: str = HF_DATASET_REPO, repo_type: str = "dataset",
38
- revision: str = HF_DATASET_REV) -> pd.DataFrame:
39
- # 1) 로컬 우선
 
 
 
40
  if os.path.exists(local_path):
41
  with open(local_path, "rb") as f:
42
  data = f.read()
43
  if not _is_pointer_bytes(data):
44
- return _read_csv_bytes(data)
45
- # 포인터면 허브로 폴백
46
- # 2) 허브 다운로드
 
47
  cached = hf_hub_download(repo_id=repo_id, filename=hub_filename,
48
  repo_type=repo_type, revision=revision)
49
  try:
 
2
  # coding: utf-8
3
 
4
  # In[10]:
5
+ import os, io
6
  from huggingface_hub import hf_hub_download
7
  import pandas as pd
8
  import torch
 
33
  except UnicodeDecodeError:
34
  return pd.read_csv(io.BytesIO(b), encoding="cp949")
35
 
36
+ def load_csv_smart(local_path: str,
37
+ hub_filename: str | None = None,
38
+ repo_id: str = HF_DATASET_REPO,
39
+ repo_type: str = "dataset",
40
+ revision: str = HF_DATASET_REV):
41
+ if hub_filename is None:
42
+ hub_filename = os.path.basename(local_path)
43
  if os.path.exists(local_path):
44
  with open(local_path, "rb") as f:
45
  data = f.read()
46
  if not _is_pointer_bytes(data):
47
+ try:
48
+ return pd.read_csv(io.BytesIO(data), encoding="utf-8")
49
+ except UnicodeDecodeError:
50
+ return pd.read_csv(io.BytesIO(data), encoding="cp949")
51
  cached = hf_hub_download(repo_id=repo_id, filename=hub_filename,
52
  repo_type=repo_type, revision=revision)
53
  try: