Ilia Tambovtsev commited on
Commit
0c02234
·
1 Parent(s): 364b978

feat: implement reffering to sheet by name

Browse files
Files changed (1) hide show
  1. src/config/spreadsheets.py +32 -7
src/config/spreadsheets.py CHANGED
@@ -15,18 +15,43 @@ logger = logging.getLogger(__name__)
15
 
16
 
17
  def load_spreadsheet(
18
- sheet_id: Optional[str] = None, gid: Optional[str] = None
 
19
  ) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
 
 
20
  if sheet_id is None:
21
  load_dotenv()
22
  sheet_id = os.environ.get("BENCHMARK_SPREADSHEET_ID")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- csv_load_url = (
25
- f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv"
26
- )
27
- if gid is not None:
28
- csv_load_url = f"{csv_load_url}&gid={gid}"
29
- df = pd.read_csv(csv_load_url)
30
  return df
31
 
32
 
 
15
 
16
 
17
  def load_spreadsheet(
18
+ sheet_id: Optional[str] = None,
19
+ gid: Optional[Union[str, int]] = None
20
  ) -> pd.DataFrame:
21
+ """Load data from Google Spreadsheet.
22
+
23
+ Args:
24
+ sheet_id: Spreadsheet ID. If None, loads from BENCHMARK_SPREADSHEET_ID env var
25
+ gid: Sheet identifier. Can be either:
26
+ - Sheet ID (numeric)
27
+ - Sheet name (string)
28
+ If None, loads the first sheet
29
+
30
+ Returns:
31
+ DataFrame with loaded data
32
+ """
33
  if sheet_id is None:
34
  load_dotenv()
35
  sheet_id = os.environ.get("BENCHMARK_SPREADSHEET_ID")
36
+ if not sheet_id:
37
+ raise ValueError("No spreadsheet ID provided")
38
+
39
+ logger.info(f"Loading questions from spreadsheet ({sheet_id[:15]}...)/{gid}")
40
+ # Check if gid is numeric (sheet ID) or string (sheet name)
41
+ if gid is None or str(gid).isdigit():
42
+ # Use CSV export URL for numeric gid
43
+ csv_load_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv"
44
+ if gid is not None:
45
+ csv_load_url = f"{csv_load_url}&gid={gid}"
46
+ df = pd.read_csv(csv_load_url)
47
+ else:
48
+ # Load by sheet name using gspread_pandas
49
+ google_config_dir = Config().navigator.root
50
+ google_config = get_config(google_config_dir, "google_config.json")
51
+ client = Client(config=google_config)
52
+ spread = Spread(sheet_id, client=client)
53
+ df = spread.sheet_to_df(sheet=str(gid), index=False)
54
 
 
 
 
 
 
 
55
  return df
56
 
57