File size: 1,979 Bytes
7576570
 
 
 
 
 
 
57d3340
 
7576570
 
 
 
3d4aba5
7576570
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from sqlalchemy import create_engine, Column, Integer, String, JSON, DateTime,ARRAY, Float
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, Session
from typing import Generator
from contextlib import contextmanager
from datetime import datetime
import pytz
import os




# Create database URL (update this according to your database type)
SQLALCHEMY_DATABASE_URL = os.environ['SQLALCHEMY_DATABASE_URL']
# Create engine with connection pooling
engine = create_engine(
    SQLALCHEMY_DATABASE_URL,
    pool_size=5,
    max_overflow=10,
    pool_timeout=30,
    pool_recycle=1800
)

SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()

class ExtractedFile(Base):
    __tablename__ = "extracted_files"
    
    id = Column(Integer, primary_key=True)
    file_name = Column(String(255), nullable=False)
    file_data = Column(JSON, nullable=False)
    repository_url = Column(String, nullable=False)
    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(pytz.UTC))

    def __repr__(self):
        return f"<ExtractedFile(id={self.id}, file_name={self.file_name}, repository_url={self.repository_url})>"

    @classmethod
    def get_by_extension(cls, db: Session, extension: str):
        """Get all files with specific extension"""
        return db.query(cls).filter(
            cls.file_name.endswith(extension)
        ).all()
    
class DocumentEmbedding(Base):
    """Model for storing document embeddings."""
    __tablename__ = "document_embeddings"

    id = Column(Integer, primary_key=True, index=True)
    content = Column(String, index=True)
    embedding = Column(ARRAY(Float))
    doc_metadata = Column(JSON)
    source = Column(String, index=True) 

@contextmanager
def get_db() -> Generator[Session, None, None]:
    """Context manager for database sessions"""
    db = SessionLocal()
    try:
        yield db
    finally:
        db.close()