Spaces:
Runtime error
Runtime error
create embeddings of ppt and store metadata of files in tables
Browse files- migrations/versions/{c649dcba2de3_create_tables.py → 3292a4761b48_files_and_vector_records.py} +24 -6
- migrations/versions/37cddaae80e7_create_tables.py +0 -30
- src/controllers/_bot_controller.py +46 -1
- src/models/__init__.py +3 -1
- src/repositories/__init__.py +3 -1
- src/repositories/_file_repository.py +32 -0
- src/repositories/_vector_record_repository.py +21 -0
- src/services/__init__.py +1 -1
- src/services/_file_service.py +59 -2
migrations/versions/{c649dcba2de3_create_tables.py → 3292a4761b48_files_and_vector_records.py}
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
-
"""
|
| 2 |
|
| 3 |
-
Revision ID:
|
| 4 |
Revises: 8652e8501339
|
| 5 |
-
Create Date: 2024-10-
|
| 6 |
|
| 7 |
"""
|
| 8 |
from typing import Sequence, Union
|
|
@@ -12,7 +12,7 @@ import sqlalchemy as sa
|
|
| 12 |
|
| 13 |
|
| 14 |
# revision identifiers, used by Alembic.
|
| 15 |
-
revision: str = '
|
| 16 |
down_revision: Union[str, None] = '8652e8501339'
|
| 17 |
branch_labels: Union[str, Sequence[str], None] = None
|
| 18 |
depends_on: Union[str, Sequence[str], None] = None
|
|
@@ -20,11 +20,29 @@ depends_on: Union[str, Sequence[str], None] = None
|
|
| 20 |
|
| 21 |
def upgrade() -> None:
|
| 22 |
# ### commands auto generated by Alembic - please adjust! ###
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
# ### end Alembic commands ###
|
| 25 |
|
| 26 |
|
| 27 |
def downgrade() -> None:
|
| 28 |
# ### commands auto generated by Alembic - please adjust! ###
|
| 29 |
-
|
|
|
|
| 30 |
# ### end Alembic commands ###
|
|
|
|
| 1 |
+
"""files_and_vector_records
|
| 2 |
|
| 3 |
+
Revision ID: 3292a4761b48
|
| 4 |
Revises: 8652e8501339
|
| 5 |
+
Create Date: 2024-10-28 13:03:53.442309
|
| 6 |
|
| 7 |
"""
|
| 8 |
from typing import Sequence, Union
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
# revision identifiers, used by Alembic.
|
| 15 |
+
revision: str = '3292a4761b48'
|
| 16 |
down_revision: Union[str, None] = '8652e8501339'
|
| 17 |
branch_labels: Union[str, Sequence[str], None] = None
|
| 18 |
depends_on: Union[str, Sequence[str], None] = None
|
|
|
|
| 20 |
|
| 21 |
def upgrade() -> None:
|
| 22 |
# ### commands auto generated by Alembic - please adjust! ###
|
| 23 |
+
op.create_table('files',
|
| 24 |
+
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
|
| 25 |
+
sa.Column('file_name', sa.String(), nullable=False),
|
| 26 |
+
sa.Column('uploaded_at', sa.DateTime(), nullable=False),
|
| 27 |
+
sa.Column('processed_at', sa.DateTime(), nullable=True),
|
| 28 |
+
sa.Column('slack_file_id', sa.String(), nullable=True),
|
| 29 |
+
sa.Column('user_id', sa.Integer(), nullable=False),
|
| 30 |
+
sa.Column('mime_type', sa.String(), nullable=False),
|
| 31 |
+
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
|
| 32 |
+
sa.PrimaryKeyConstraint('id')
|
| 33 |
+
)
|
| 34 |
+
op.create_table('vector_records',
|
| 35 |
+
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
|
| 36 |
+
sa.Column('file_id', sa.Integer(), nullable=False),
|
| 37 |
+
sa.Column('record_id', sa.Integer(), nullable=False),
|
| 38 |
+
sa.ForeignKeyConstraint(['file_id'], ['files.id'], ),
|
| 39 |
+
sa.PrimaryKeyConstraint('id')
|
| 40 |
+
)
|
| 41 |
# ### end Alembic commands ###
|
| 42 |
|
| 43 |
|
| 44 |
def downgrade() -> None:
|
| 45 |
# ### commands auto generated by Alembic - please adjust! ###
|
| 46 |
+
op.drop_table('vector_records')
|
| 47 |
+
op.drop_table('files')
|
| 48 |
# ### end Alembic commands ###
|
migrations/versions/37cddaae80e7_create_tables.py
DELETED
|
@@ -1,30 +0,0 @@
|
|
| 1 |
-
"""create_tables
|
| 2 |
-
|
| 3 |
-
Revision ID: 37cddaae80e7
|
| 4 |
-
Revises: c649dcba2de3
|
| 5 |
-
Create Date: 2024-10-28 11:29:18.878826
|
| 6 |
-
|
| 7 |
-
"""
|
| 8 |
-
from typing import Sequence, Union
|
| 9 |
-
|
| 10 |
-
from alembic import op
|
| 11 |
-
import sqlalchemy as sa
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
# revision identifiers, used by Alembic.
|
| 15 |
-
revision: str = '37cddaae80e7'
|
| 16 |
-
down_revision: Union[str, None] = 'c649dcba2de3'
|
| 17 |
-
branch_labels: Union[str, Sequence[str], None] = None
|
| 18 |
-
depends_on: Union[str, Sequence[str], None] = None
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
def upgrade() -> None:
|
| 22 |
-
# ### commands auto generated by Alembic - please adjust! ###
|
| 23 |
-
pass
|
| 24 |
-
# ### end Alembic commands ###
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
def downgrade() -> None:
|
| 28 |
-
# ### commands auto generated by Alembic - please adjust! ###
|
| 29 |
-
pass
|
| 30 |
-
# ### end Alembic commands ###
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/controllers/_bot_controller.py
CHANGED
|
@@ -7,7 +7,7 @@ from src.utils import SlackClient
|
|
| 7 |
from src.services import SlackBotService, SlackTeamService, UserService
|
| 8 |
from src.models import ContentDeliveryFrequency
|
| 9 |
from src.utils import logger
|
| 10 |
-
|
| 11 |
|
| 12 |
class BotController:
|
| 13 |
def __init__(self):
|
|
@@ -23,23 +23,68 @@ class BotController:
|
|
| 23 |
try:
|
| 24 |
if "challenge" in event:
|
| 25 |
return {"challenge": event["challenge"]}
|
|
|
|
| 26 |
if "event" in event:
|
| 27 |
actual_event = event["event"]
|
| 28 |
logger.info(f"Actual event: {actual_event}")
|
|
|
|
| 29 |
if "file" in actual_event:
|
| 30 |
file = actual_event["file"]
|
| 31 |
file_id = file["id"]
|
| 32 |
logger.info(f"file id is {file_id}")
|
|
|
|
| 33 |
async with SlackClient() as slack_client:
|
|
|
|
| 34 |
file_info = await slack_client.get_file_info(file_id)
|
| 35 |
logger.info(f"File info: {file_info}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
users = await slack_client.get_users_list()
|
| 37 |
logger.info(f"Users: {users}")
|
| 38 |
else:
|
| 39 |
logger.info("file not shared")
|
| 40 |
else:
|
| 41 |
logger.info("event not found")
|
|
|
|
| 42 |
return {"message": "Event received successfully"}
|
|
|
|
| 43 |
except HTTPException as e:
|
| 44 |
logger.warning(e)
|
| 45 |
raise e
|
|
|
|
| 7 |
from src.services import SlackBotService, SlackTeamService, UserService
|
| 8 |
from src.models import ContentDeliveryFrequency
|
| 9 |
from src.utils import logger
|
| 10 |
+
from src.services import FileService
|
| 11 |
|
| 12 |
class BotController:
|
| 13 |
def __init__(self):
|
|
|
|
| 23 |
try:
|
| 24 |
if "challenge" in event:
|
| 25 |
return {"challenge": event["challenge"]}
|
| 26 |
+
|
| 27 |
if "event" in event:
|
| 28 |
actual_event = event["event"]
|
| 29 |
logger.info(f"Actual event: {actual_event}")
|
| 30 |
+
|
| 31 |
if "file" in actual_event:
|
| 32 |
file = actual_event["file"]
|
| 33 |
file_id = file["id"]
|
| 34 |
logger.info(f"file id is {file_id}")
|
| 35 |
+
|
| 36 |
async with SlackClient() as slack_client:
|
| 37 |
+
# Get file info
|
| 38 |
file_info = await slack_client.get_file_info(file_id)
|
| 39 |
logger.info(f"File info: {file_info}")
|
| 40 |
+
|
| 41 |
+
# Get file URL
|
| 42 |
+
file_url = file_info.get("url_private")
|
| 43 |
+
file_name = file_info.get("name", "downloaded_file")
|
| 44 |
+
|
| 45 |
+
if file_url:
|
| 46 |
+
# Download the file content
|
| 47 |
+
headers = {"Authorization": f"Bearer {slack_client.token}"}
|
| 48 |
+
async with aiohttp.ClientSession() as session:
|
| 49 |
+
async with session.get(file_url, headers=headers) as response:
|
| 50 |
+
if response.status == 200:
|
| 51 |
+
content = await response.read()
|
| 52 |
+
# Create UploadFile object from the content
|
| 53 |
+
upload_file = UploadFile(
|
| 54 |
+
filename=file_name,
|
| 55 |
+
file=BytesIO(content)
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Process the file
|
| 59 |
+
async with self.user_repository as user_repo:
|
| 60 |
+
user = await user_repo.get_user_by_slack_id(user_slack_id)
|
| 61 |
+
if not user:
|
| 62 |
+
logger.error(f"User not found for slack_id: {user_slack_id}")
|
| 63 |
+
return {"message": "User not found"}
|
| 64 |
+
|
| 65 |
+
async with self.file_service as service:
|
| 66 |
+
result = await service.parse_document(
|
| 67 |
+
file=upload_file,
|
| 68 |
+
user_id=user.id,
|
| 69 |
+
slack_file_id=slack_file_id
|
| 70 |
+
)
|
| 71 |
+
logger.info(f"File processing result: {result}")
|
| 72 |
+
else:
|
| 73 |
+
logger.error(f"Failed to download file. Status: {response.status}")
|
| 74 |
+
raise HTTPException(
|
| 75 |
+
status_code=response.status,
|
| 76 |
+
detail="Failed to download file from Slack"
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
users = await slack_client.get_users_list()
|
| 80 |
logger.info(f"Users: {users}")
|
| 81 |
else:
|
| 82 |
logger.info("file not shared")
|
| 83 |
else:
|
| 84 |
logger.info("event not found")
|
| 85 |
+
|
| 86 |
return {"message": "Event received successfully"}
|
| 87 |
+
|
| 88 |
except HTTPException as e:
|
| 89 |
logger.warning(e)
|
| 90 |
raise e
|
src/models/__init__.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
| 1 |
from ._base import Base
|
| 2 |
from ._user import User, ContentDeliveryFrequency
|
| 3 |
from ._slack_team import SlackTeam
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
__all__ = ["Base", "User", "ContentDeliveryFrequency", "SlackTeam"]
|
| 6 |
__version__ = "0.1.0"
|
| 7 |
__author__ = "Kanha Upadhyay"
|
|
|
|
| 1 |
from ._base import Base
|
| 2 |
from ._user import User, ContentDeliveryFrequency
|
| 3 |
from ._slack_team import SlackTeam
|
| 4 |
+
from ._files import File
|
| 5 |
+
from ._vector_records import VectorRecord
|
| 6 |
|
| 7 |
+
__all__ = ["Base", "User", "ContentDeliveryFrequency", "SlackTeam", "File", "VectorRecord"]
|
| 8 |
__version__ = "0.1.0"
|
| 9 |
__author__ = "Kanha Upadhyay"
|
src/repositories/__init__.py
CHANGED
|
@@ -2,7 +2,9 @@ from ._base_repository import BaseRepository
|
|
| 2 |
from ._config import DatabaseConfig
|
| 3 |
from ._user_repository import UserRepository
|
| 4 |
from ._slack_team_repository import SlackTeamRepository
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
__all__ = ["DatabaseConfig", "BaseRepository", "UserRepository", "SlackTeamRepository"]
|
| 7 |
__version__ = "0.1.0"
|
| 8 |
__author__ = "Kanha Upadhyay"
|
|
|
|
| 2 |
from ._config import DatabaseConfig
|
| 3 |
from ._user_repository import UserRepository
|
| 4 |
from ._slack_team_repository import SlackTeamRepository
|
| 5 |
+
from ._file_repository import FileRepository
|
| 6 |
+
from ._vector_record_repository import VectorRecordRepository
|
| 7 |
|
| 8 |
+
__all__ = ["DatabaseConfig", "BaseRepository", "UserRepository", "SlackTeamRepository", "FileRepository", "VectorRecordRepository"]
|
| 9 |
__version__ = "0.1.0"
|
| 10 |
__author__ = "Kanha Upadhyay"
|
src/repositories/_file_repository.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import select
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
from src.models import File
|
| 4 |
+
from ._base_repository import BaseRepository
|
| 5 |
+
|
| 6 |
+
class FileRepository(BaseRepository):
|
| 7 |
+
def __init__(self):
|
| 8 |
+
super().__init__(model=File)
|
| 9 |
+
|
| 10 |
+
async def create_file(self, file_name: str, user_id: int, mime_type: str, slack_file_id: str = None) -> File:
|
| 11 |
+
"""Create a new file record"""
|
| 12 |
+
file = File(
|
| 13 |
+
file_name=file_name,
|
| 14 |
+
user_id=user_id,
|
| 15 |
+
mime_type=mime_type,
|
| 16 |
+
slack_file_id=slack_file_id,
|
| 17 |
+
uploaded_at=datetime.utcnow()
|
| 18 |
+
)
|
| 19 |
+
return await self.add(file)
|
| 20 |
+
|
| 21 |
+
async def mark_as_processed(self, file_id: int) -> None:
|
| 22 |
+
"""Mark file as processed"""
|
| 23 |
+
file = await self.get_by_id(file_id)
|
| 24 |
+
if file:
|
| 25 |
+
file.processed_at = datetime.utcnow()
|
| 26 |
+
await self.update(file)
|
| 27 |
+
|
| 28 |
+
async def get_by_slack_file_id(self, slack_file_id: str) -> File:
|
| 29 |
+
"""Get file by Slack file ID"""
|
| 30 |
+
query = select(File).where(File.slack_file_id == slack_file_id)
|
| 31 |
+
result = await self.execute(query)
|
| 32 |
+
return result.scalar_one_or_none()
|
src/repositories/_vector_record_repository.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.models import VectorRecord
|
| 2 |
+
from ._base_repository import BaseRepository
|
| 3 |
+
from typing import List
|
| 4 |
+
|
| 5 |
+
class VectorRecordRepository(BaseRepository):
|
| 6 |
+
def __init__(self):
|
| 7 |
+
super().__init__(model=VectorRecord)
|
| 8 |
+
|
| 9 |
+
async def create_records(self, file_id: int, record_ids: List[int]) -> List[VectorRecord]:
|
| 10 |
+
"""Create multiple vector records for a file"""
|
| 11 |
+
records = [
|
| 12 |
+
VectorRecord(file_id=file_id, record_id=record_id)
|
| 13 |
+
for record_id in record_ids
|
| 14 |
+
]
|
| 15 |
+
return await self.add_all(records)
|
| 16 |
+
|
| 17 |
+
async def get_by_file_id(self, file_id: int) -> List[VectorRecord]:
|
| 18 |
+
"""Get all vector records for a file"""
|
| 19 |
+
query = select(VectorRecord).where(VectorRecord.file_id == file_id)
|
| 20 |
+
result = await self.execute(query)
|
| 21 |
+
return result.scalars().all()
|
src/services/__init__.py
CHANGED
|
@@ -4,6 +4,6 @@ from ._slack_bot_service import SlackBotService
|
|
| 4 |
from ._slack_team_service import SlackTeamService
|
| 5 |
from ._file_service import FileService
|
| 6 |
|
| 7 |
-
__all__ = ["ContentDeliveryService", "UserService", "SlackBotService", "SlackTeamService","FileService"]
|
| 8 |
__version__ = "0.1.0"
|
| 9 |
__author__ = "Ashok Bhati"
|
|
|
|
| 4 |
from ._slack_team_service import SlackTeamService
|
| 5 |
from ._file_service import FileService
|
| 6 |
|
| 7 |
+
__all__ = ["ContentDeliveryService", "UserService", "SlackBotService", "SlackTeamService", "FileService"]
|
| 8 |
__version__ = "0.1.0"
|
| 9 |
__author__ = "Ashok Bhati"
|
src/services/_file_service.py
CHANGED
|
@@ -36,18 +36,75 @@ class FileService:
|
|
| 36 |
shutil.copyfileobj(file.file, buffer)
|
| 37 |
return file_path
|
| 38 |
|
| 39 |
-
async def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
try:
|
| 41 |
file_path = await self.save_uploaded_file(file)
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
parsed_content = await parse_file_content(str(file_path))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
documents = create_documents(parsed_content, file.filename)
|
| 46 |
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
return {
|
| 50 |
"message": "Document's embeddings created successfully",
|
|
|
|
| 51 |
}
|
| 52 |
except Exception as e:
|
| 53 |
logger.error(f"Error in parse_document: {str(e)}")
|
|
|
|
| 36 |
shutil.copyfileobj(file.file, buffer)
|
| 37 |
return file_path
|
| 38 |
|
| 39 |
+
async def filter_ppt_content(self, parsed_content: Tuple[Tuple[int, str], ...]) -> Tuple[Tuple[int, str], ...]:
|
| 40 |
+
"""
|
| 41 |
+
Filter PowerPoint content based on specific target phrases.
|
| 42 |
+
|
| 43 |
+
Args:
|
| 44 |
+
parsed_content: Tuple of (page_number, content) pairs
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
Filtered tuple of (page_number, content) pairs
|
| 48 |
+
"""
|
| 49 |
+
target_phrases = [
|
| 50 |
+
"My Strategy for Managing Myself",
|
| 51 |
+
"My Strategy For Active Listening",
|
| 52 |
+
"My Strategy for Managing Others",
|
| 53 |
+
"What It Concretely Looks Like",
|
| 54 |
+
"My People",
|
| 55 |
+
"My Philosophy",
|
| 56 |
+
"My Preferences",
|
| 57 |
+
"My Priorities for Q1 2025"
|
| 58 |
+
]
|
| 59 |
+
|
| 60 |
+
filtered_content = []
|
| 61 |
+
|
| 62 |
+
for page_num, content in parsed_content:
|
| 63 |
+
content_lower = content.lower()
|
| 64 |
+
for phrase in target_phrases:
|
| 65 |
+
if phrase.lower() in content_lower:
|
| 66 |
+
filtered_content.append((page_num, content))
|
| 67 |
+
break # Move to next page once we find a matching phrase
|
| 68 |
+
|
| 69 |
+
logger.info(f"Total pages before filtering: {len(parsed_content)}")
|
| 70 |
+
logger.info(f"Total pages after filtering: {len(filtered_content)}")
|
| 71 |
+
|
| 72 |
+
return tuple(filtered_content)
|
| 73 |
+
|
| 74 |
+
async def parse_document(self, file: UploadFile, user_id: int, slack_file_id: str = None) -> Dict:
|
| 75 |
try:
|
| 76 |
file_path = await self.save_uploaded_file(file)
|
| 77 |
|
| 78 |
+
# Create file record in database
|
| 79 |
+
mime_type = file.content_type or f"application/{file.filename.split('.')[-1]}"
|
| 80 |
+
db_file = await self.file_repository.create_file(
|
| 81 |
+
file_name=file.filename,
|
| 82 |
+
user_id=user_id,
|
| 83 |
+
mime_type=mime_type,
|
| 84 |
+
slack_file_id=slack_file_id
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
parsed_content = await parse_file_content(str(file_path))
|
| 88 |
+
|
| 89 |
+
file_extension = file.filename.lower().split('.')[-1]
|
| 90 |
+
if file_extension in ['ppt', 'pptx']:
|
| 91 |
+
parsed_content = await self.filter_ppt_content(parsed_content)
|
| 92 |
|
| 93 |
documents = create_documents(parsed_content, file.filename)
|
| 94 |
|
| 95 |
+
# Store documents in vector store and get record IDs
|
| 96 |
+
record_ids = await store_documents(documents)
|
| 97 |
+
|
| 98 |
+
# Store vector record mappings in database
|
| 99 |
+
if record_ids:
|
| 100 |
+
await self.vector_repository.create_records(db_file.id, record_ids)
|
| 101 |
+
|
| 102 |
+
# Mark file as processed
|
| 103 |
+
await self.file_repository.mark_as_processed(db_file.id)
|
| 104 |
|
| 105 |
return {
|
| 106 |
"message": "Document's embeddings created successfully",
|
| 107 |
+
"file_id": db_file.id
|
| 108 |
}
|
| 109 |
except Exception as e:
|
| 110 |
logger.error(f"Error in parse_document: {str(e)}")
|