Spaces:
Sleeping
Sleeping
amaye15
commited on
Commit
·
e0b1978
1
Parent(s):
bc82930
Feat - Progress updated
Browse files
src/api/services/embedding_service.py
CHANGED
|
@@ -87,6 +87,8 @@ class EmbeddingService:
|
|
| 87 |
self.model = model
|
| 88 |
self.batch_size = batch_size
|
| 89 |
self.semaphore = asyncio.Semaphore(max_concurrent_requests) # Rate limiter
|
|
|
|
|
|
|
| 90 |
|
| 91 |
async def get_embedding(self, text: str) -> List[float]:
|
| 92 |
"""Generate embeddings for the given text using OpenAI."""
|
|
@@ -96,6 +98,8 @@ class EmbeddingService:
|
|
| 96 |
response = await self.client.embeddings.create(
|
| 97 |
input=[text], model=self.model
|
| 98 |
)
|
|
|
|
|
|
|
| 99 |
return response.data[0].embedding
|
| 100 |
except Exception as e:
|
| 101 |
logger.error(f"Failed to generate embedding: {e}")
|
|
@@ -106,6 +110,9 @@ class EmbeddingService:
|
|
| 106 |
) -> pd.DataFrame:
|
| 107 |
"""Create embeddings for the target column in the dataset."""
|
| 108 |
logger.info("Generating embeddings...")
|
|
|
|
|
|
|
|
|
|
| 109 |
batches = [
|
| 110 |
df[i : i + self.batch_size] for i in range(0, len(df), self.batch_size)
|
| 111 |
]
|
|
@@ -126,3 +133,10 @@ class EmbeddingService:
|
|
| 126 |
)
|
| 127 |
df_batch[output_column] = embeddings
|
| 128 |
return df_batch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
self.model = model
|
| 88 |
self.batch_size = batch_size
|
| 89 |
self.semaphore = asyncio.Semaphore(max_concurrent_requests) # Rate limiter
|
| 90 |
+
self.total_requests = 0 # Total number of requests to process
|
| 91 |
+
self.completed_requests = 0 # Number of completed requests
|
| 92 |
|
| 93 |
async def get_embedding(self, text: str) -> List[float]:
|
| 94 |
"""Generate embeddings for the given text using OpenAI."""
|
|
|
|
| 98 |
response = await self.client.embeddings.create(
|
| 99 |
input=[text], model=self.model
|
| 100 |
)
|
| 101 |
+
self.completed_requests += 1 # Increment completed requests
|
| 102 |
+
self._log_progress() # Log progress
|
| 103 |
return response.data[0].embedding
|
| 104 |
except Exception as e:
|
| 105 |
logger.error(f"Failed to generate embedding: {e}")
|
|
|
|
| 110 |
) -> pd.DataFrame:
|
| 111 |
"""Create embeddings for the target column in the dataset."""
|
| 112 |
logger.info("Generating embeddings...")
|
| 113 |
+
self.total_requests = len(df) # Set total number of requests
|
| 114 |
+
self.completed_requests = 0 # Reset completed requests counter
|
| 115 |
+
|
| 116 |
batches = [
|
| 117 |
df[i : i + self.batch_size] for i in range(0, len(df), self.batch_size)
|
| 118 |
]
|
|
|
|
| 133 |
)
|
| 134 |
df_batch[output_column] = embeddings
|
| 135 |
return df_batch
|
| 136 |
+
|
| 137 |
+
def _log_progress(self):
|
| 138 |
+
"""Log the progress of embedding generation."""
|
| 139 |
+
progress = (self.completed_requests / self.total_requests) * 100
|
| 140 |
+
logger.info(
|
| 141 |
+
f"Progress: {self.completed_requests}/{self.total_requests} ({progress:.2f}%)"
|
| 142 |
+
)
|