Spaces:
Running
Running
Set max line length to 120
Browse files- pyproject.toml +2 -0
- src/embedder.py +7 -22
pyproject.toml
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[tool.black]
|
| 2 |
+
line-length = 120
|
src/embedder.py
CHANGED
|
@@ -62,28 +62,17 @@ class OpenAIBatchEmbedder(BatchEmbedder):
|
|
| 62 |
openai_batch_id = self._issue_job_for_chunks(
|
| 63 |
sub_batch, batch_id=f"{repo_name}/{len(self.openai_batch_ids)}"
|
| 64 |
)
|
| 65 |
-
self.openai_batch_ids[openai_batch_id] = self._metadata_for_chunks(
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
if (
|
| 69 |
-
max_embedding_jobs
|
| 70 |
-
and len(self.openai_batch_ids) >= max_embedding_jobs
|
| 71 |
-
):
|
| 72 |
-
logging.info(
|
| 73 |
-
"Reached the maximum number of embedding jobs. Stopping."
|
| 74 |
-
)
|
| 75 |
return
|
| 76 |
batch = []
|
| 77 |
|
| 78 |
# Finally, commit the last batch.
|
| 79 |
if batch:
|
| 80 |
-
openai_batch_id = self._issue_job_for_chunks(
|
| 81 |
-
batch, batch_id=f"{repo_name}/{len(self.openai_batch_ids)}"
|
| 82 |
-
)
|
| 83 |
self.openai_batch_ids[openai_batch_id] = self._metadata_for_chunks(batch)
|
| 84 |
-
logging.info(
|
| 85 |
-
"Issued %d jobs for %d chunks.", len(self.openai_batch_ids), chunk_count
|
| 86 |
-
)
|
| 87 |
|
| 88 |
# Save the job IDs to a file, just in case this script is terminated by mistake.
|
| 89 |
metadata_file = os.path.join(self.local_dir, "openai_batch_ids.json")
|
|
@@ -139,9 +128,7 @@ class OpenAIBatchEmbedder(BatchEmbedder):
|
|
| 139 |
OpenAIBatchEmbedder._export_to_jsonl([request], input_file)
|
| 140 |
|
| 141 |
# Uplaod the file and issue the embedding job.
|
| 142 |
-
batch_input_file = self.client.files.create(
|
| 143 |
-
file=open(input_file, "rb"), purpose="batch"
|
| 144 |
-
)
|
| 145 |
batch_status = self._create_batch_job(batch_input_file.id)
|
| 146 |
logging.info("Created job with ID %s", batch_status.id)
|
| 147 |
return batch_status.id
|
|
@@ -157,9 +144,7 @@ class OpenAIBatchEmbedder(BatchEmbedder):
|
|
| 157 |
metadata={},
|
| 158 |
)
|
| 159 |
except Exception as e:
|
| 160 |
-
print(
|
| 161 |
-
f"Failed to create batch job with input_file_id={input_file_id}. Error: {e}"
|
| 162 |
-
)
|
| 163 |
return None
|
| 164 |
|
| 165 |
@staticmethod
|
|
|
|
| 62 |
openai_batch_id = self._issue_job_for_chunks(
|
| 63 |
sub_batch, batch_id=f"{repo_name}/{len(self.openai_batch_ids)}"
|
| 64 |
)
|
| 65 |
+
self.openai_batch_ids[openai_batch_id] = self._metadata_for_chunks(sub_batch)
|
| 66 |
+
if max_embedding_jobs and len(self.openai_batch_ids) >= max_embedding_jobs:
|
| 67 |
+
logging.info("Reached the maximum number of embedding jobs. Stopping.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
return
|
| 69 |
batch = []
|
| 70 |
|
| 71 |
# Finally, commit the last batch.
|
| 72 |
if batch:
|
| 73 |
+
openai_batch_id = self._issue_job_for_chunks(batch, batch_id=f"{repo_name}/{len(self.openai_batch_ids)}")
|
|
|
|
|
|
|
| 74 |
self.openai_batch_ids[openai_batch_id] = self._metadata_for_chunks(batch)
|
| 75 |
+
logging.info("Issued %d jobs for %d chunks.", len(self.openai_batch_ids), chunk_count)
|
|
|
|
|
|
|
| 76 |
|
| 77 |
# Save the job IDs to a file, just in case this script is terminated by mistake.
|
| 78 |
metadata_file = os.path.join(self.local_dir, "openai_batch_ids.json")
|
|
|
|
| 128 |
OpenAIBatchEmbedder._export_to_jsonl([request], input_file)
|
| 129 |
|
| 130 |
# Uplaod the file and issue the embedding job.
|
| 131 |
+
batch_input_file = self.client.files.create(file=open(input_file, "rb"), purpose="batch")
|
|
|
|
|
|
|
| 132 |
batch_status = self._create_batch_job(batch_input_file.id)
|
| 133 |
logging.info("Created job with ID %s", batch_status.id)
|
| 134 |
return batch_status.id
|
|
|
|
| 144 |
metadata={},
|
| 145 |
)
|
| 146 |
except Exception as e:
|
| 147 |
+
print(f"Failed to create batch job with input_file_id={input_file_id}. Error: {e}")
|
|
|
|
|
|
|
| 148 |
return None
|
| 149 |
|
| 150 |
@staticmethod
|