juliaturc commited on
Commit
77a0875
·
1 Parent(s): eab5126

Set max line length to 120

Browse files
Files changed (2) hide show
  1. pyproject.toml +2 -0
  2. src/embedder.py +7 -22
pyproject.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [tool.black]
2
+ line-length = 120
src/embedder.py CHANGED
@@ -62,28 +62,17 @@ class OpenAIBatchEmbedder(BatchEmbedder):
62
  openai_batch_id = self._issue_job_for_chunks(
63
  sub_batch, batch_id=f"{repo_name}/{len(self.openai_batch_ids)}"
64
  )
65
- self.openai_batch_ids[openai_batch_id] = self._metadata_for_chunks(
66
- sub_batch
67
- )
68
- if (
69
- max_embedding_jobs
70
- and len(self.openai_batch_ids) >= max_embedding_jobs
71
- ):
72
- logging.info(
73
- "Reached the maximum number of embedding jobs. Stopping."
74
- )
75
  return
76
  batch = []
77
 
78
  # Finally, commit the last batch.
79
  if batch:
80
- openai_batch_id = self._issue_job_for_chunks(
81
- batch, batch_id=f"{repo_name}/{len(self.openai_batch_ids)}"
82
- )
83
  self.openai_batch_ids[openai_batch_id] = self._metadata_for_chunks(batch)
84
- logging.info(
85
- "Issued %d jobs for %d chunks.", len(self.openai_batch_ids), chunk_count
86
- )
87
 
88
  # Save the job IDs to a file, just in case this script is terminated by mistake.
89
  metadata_file = os.path.join(self.local_dir, "openai_batch_ids.json")
@@ -139,9 +128,7 @@ class OpenAIBatchEmbedder(BatchEmbedder):
139
  OpenAIBatchEmbedder._export_to_jsonl([request], input_file)
140
 
141
  # Uplaod the file and issue the embedding job.
142
- batch_input_file = self.client.files.create(
143
- file=open(input_file, "rb"), purpose="batch"
144
- )
145
  batch_status = self._create_batch_job(batch_input_file.id)
146
  logging.info("Created job with ID %s", batch_status.id)
147
  return batch_status.id
@@ -157,9 +144,7 @@ class OpenAIBatchEmbedder(BatchEmbedder):
157
  metadata={},
158
  )
159
  except Exception as e:
160
- print(
161
- f"Failed to create batch job with input_file_id={input_file_id}. Error: {e}"
162
- )
163
  return None
164
 
165
  @staticmethod
 
62
  openai_batch_id = self._issue_job_for_chunks(
63
  sub_batch, batch_id=f"{repo_name}/{len(self.openai_batch_ids)}"
64
  )
65
+ self.openai_batch_ids[openai_batch_id] = self._metadata_for_chunks(sub_batch)
66
+ if max_embedding_jobs and len(self.openai_batch_ids) >= max_embedding_jobs:
67
+ logging.info("Reached the maximum number of embedding jobs. Stopping.")
 
 
 
 
 
 
 
68
  return
69
  batch = []
70
 
71
  # Finally, commit the last batch.
72
  if batch:
73
+ openai_batch_id = self._issue_job_for_chunks(batch, batch_id=f"{repo_name}/{len(self.openai_batch_ids)}")
 
 
74
  self.openai_batch_ids[openai_batch_id] = self._metadata_for_chunks(batch)
75
+ logging.info("Issued %d jobs for %d chunks.", len(self.openai_batch_ids), chunk_count)
 
 
76
 
77
  # Save the job IDs to a file, just in case this script is terminated by mistake.
78
  metadata_file = os.path.join(self.local_dir, "openai_batch_ids.json")
 
128
  OpenAIBatchEmbedder._export_to_jsonl([request], input_file)
129
 
130
  # Uplaod the file and issue the embedding job.
131
+ batch_input_file = self.client.files.create(file=open(input_file, "rb"), purpose="batch")
 
 
132
  batch_status = self._create_batch_job(batch_input_file.id)
133
  logging.info("Created job with ID %s", batch_status.id)
134
  return batch_status.id
 
144
  metadata={},
145
  )
146
  except Exception as e:
147
+ print(f"Failed to create batch job with input_file_id={input_file_id}. Error: {e}")
 
 
148
  return None
149
 
150
  @staticmethod