NOT-OMEGA commited on
Commit
7d3f899
·
verified ·
1 Parent(s): 8ca0e43

Update classify.py

Browse files
Files changed (1) hide show
  1. classify.py +3 -3
classify.py CHANGED
@@ -8,7 +8,7 @@ import statistics
8
  import pandas as pd
9
  import multiprocessing as mp
10
  from functools import lru_cache
11
- from concurrent.futures import ThreadPoolExecutor
12
  from processor_regex import classify_with_regex
13
  from processor_bert import classify_batch as bert_batch
14
  from processor_llm import classify_with_llm
@@ -118,10 +118,10 @@ def classify_csv(input_path: str, output_path: str = "output.csv") -> tuple[str,
118
 
119
  t_start = time.perf_counter()
120
 
121
- # FIX: Use 'spawn' context! This is the magic that prevents PyTorch/ONNX Segfaults
122
  ctx = mp.get_context('spawn')
123
 
124
- with ctx.ProcessPoolExecutor(max_workers=safe_cores) as executor:
125
  for chunk_result in executor.map(_process_chunk, chunks):
126
  results.extend(chunk_result)
127
 
 
8
  import pandas as pd
9
  import multiprocessing as mp
10
  from functools import lru_cache
11
+ from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
12
  from processor_regex import classify_with_regex
13
  from processor_bert import classify_batch as bert_batch
14
  from processor_llm import classify_with_llm
 
118
 
119
  t_start = time.perf_counter()
120
 
121
+ # FIX: Correctly pass the spawn context to ProcessPoolExecutor
122
  ctx = mp.get_context('spawn')
123
 
124
+ with ProcessPoolExecutor(max_workers=safe_cores, mp_context=ctx) as executor:
125
  for chunk_result in executor.map(_process_chunk, chunks):
126
  results.extend(chunk_result)
127