github-actions[bot] commited on
Commit
bf63ef4
·
1 Parent(s): 61172af

Auto-sync from demo at Tue Dec 23 07:51:53 UTC 2025

Browse files
Files changed (1) hide show
  1. graphgen/operators/read/read.py +4 -4
graphgen/operators/read/read.py CHANGED
@@ -50,7 +50,7 @@ def _build_reader(suffix: str, cache_dir: str | None, **reader_kwargs):
50
  def read(
51
  input_path: Union[str, List[str]],
52
  allowed_suffix: Optional[List[str]] = None,
53
- cache_dir: Optional[str] = "cache",
54
  parallelism: int = 4,
55
  recursive: bool = True,
56
  **reader_kwargs: Any,
@@ -60,7 +60,7 @@ def read(
60
 
61
  :param input_path: File or directory path(s) to read from
62
  :param allowed_suffix: List of allowed file suffixes (e.g., ['pdf', 'txt'])
63
- :param cache_dir: Directory to cache intermediate files (PDF processing)
64
  :param parallelism: Number of parallel workers
65
  :param recursive: Whether to scan directories recursively
66
  :param reader_kwargs: Additional kwargs passed to readers
@@ -70,7 +70,7 @@ def read(
70
  # 1. Scan all paths to discover files
71
  logger.info("[READ] Scanning paths: %s", input_path)
72
  scanner = ParallelFileScanner(
73
- cache_dir=cache_dir,
74
  allowed_suffix=allowed_suffix,
75
  rescan=False,
76
  max_workers=parallelism if parallelism > 0 else 1,
@@ -100,7 +100,7 @@ def read(
100
  # 3. Create read tasks
101
  read_tasks = []
102
  for suffix, file_paths in files_by_suffix.items():
103
- reader = _build_reader(suffix, cache_dir, **reader_kwargs)
104
  ds = reader.read(file_paths)
105
  read_tasks.append(ds)
106
 
 
50
  def read(
51
  input_path: Union[str, List[str]],
52
  allowed_suffix: Optional[List[str]] = None,
53
+ working_dir: Optional[str] = "cache",
54
  parallelism: int = 4,
55
  recursive: bool = True,
56
  **reader_kwargs: Any,
 
60
 
61
  :param input_path: File or directory path(s) to read from
62
  :param allowed_suffix: List of allowed file suffixes (e.g., ['pdf', 'txt'])
63
+ :param working_dir: Directory to cache intermediate files (PDF processing)
64
  :param parallelism: Number of parallel workers
65
  :param recursive: Whether to scan directories recursively
66
  :param reader_kwargs: Additional kwargs passed to readers
 
70
  # 1. Scan all paths to discover files
71
  logger.info("[READ] Scanning paths: %s", input_path)
72
  scanner = ParallelFileScanner(
73
+ cache_dir=working_dir,
74
  allowed_suffix=allowed_suffix,
75
  rescan=False,
76
  max_workers=parallelism if parallelism > 0 else 1,
 
100
  # 3. Create read tasks
101
  read_tasks = []
102
  for suffix, file_paths in files_by_suffix.items():
103
+ reader = _build_reader(suffix, working_dir, **reader_kwargs)
104
  ds = reader.read(file_paths)
105
  read_tasks.append(ds)
106