Spaces:

Vanshcc
/

DB_Chatbot

Sleeping

App Files Files Community

Vanshcc commited on 12 days ago

Commit

a8441ef

verified ·

1 Parent(s): a00009c

Upload 15 files

Browse files

Files changed (13) hide show

app.py +19 -42
chatbot.py +29 -5
config.py +7 -29
database/__pycache__/__init__.cpython-311.pyc +0 -0
database/__pycache__/connection.cpython-311.pyc +0 -0
database/__pycache__/schema_introspector.cpython-311.pyc +0 -0
database/connection.py +1 -20
database/schema_introspector.py +15 -73
sql/__pycache__/__init__.cpython-311.pyc +0 -0
sql/__pycache__/generator.cpython-311.pyc +0 -0
sql/__pycache__/validator.cpython-311.pyc +0 -0
sql/generator.py +1 -2
sql/validator.py +11 -6

app.py CHANGED Viewed

@@ -47,28 +47,21 @@ GROQ_MODELS = [
 # Database types
 DB_TYPES = {
     "MySQL": "mysql",
-    "PostgreSQL": "postgresql",
-    "SQLite": "sqlite"
 }
 def create_custom_db_config(db_type: str, **kwargs) -> DatabaseConfig:
     """Create a custom database configuration from user input."""
-    db_config = DatabaseConfig.__new__(DatabaseConfig)
-    # Set database type
-    db_config.db_type = DatabaseType(db_type)
-    # Set connection parameters
-    db_config.host = kwargs.get("host", "")
-    db_config.port = kwargs.get("port", 3306 if db_type == "mysql" else 5432)
-    db_config.database = kwargs.get("database", "")
-    db_config.username = kwargs.get("username", "")
-    db_config.password = kwargs.get("password", "")
-    db_config.ssl_ca = kwargs.get("ssl_ca", None)
-    db_config.sqlite_path = kwargs.get("sqlite_path", "./chatbot.db")
-    return db_config
 def create_custom_memory(session_id: str, user_id: str, db_connection, llm_client=None,
@@ -143,10 +136,7 @@ def render_database_config():
         # Show current environment config
         current_db_type = config.database.db_type.value.upper()
         st.info(f"📌 Using {current_db_type} from environment")
-        if config.database.is_sqlite:
-            st.caption(f"Path: {config.database.sqlite_path}")
-        else:
-            st.caption(f"Host: {config.database.host}")
         return None
     else:
@@ -162,21 +152,7 @@ def render_database_config():
         )
         db_type = DB_TYPES[db_type_label]
-        if db_type == "sqlite":
-            # SQLite only needs file path
-            sqlite_path = st.text_input(
-                "Database File Path",
-                value="./chatbot.db",
-                key="sqlite_path_input",
-                help="Path to SQLite database file (will be created if doesn't exist)"
-            )
-            return {
-                "db_type": db_type,
-                "sqlite_path": sqlite_path
-            }
-        else:
             # MySQL or PostgreSQL
             col1, col2 = st.columns([3, 1])
             with col1:
@@ -411,11 +387,7 @@ def initialize_chatbot(custom_db_params=None, api_key=None, model=None) -> bool:
             # Validate custom params
             db_type = custom_db_params.get("db_type", "mysql")
-            if db_type == "sqlite":
-                if not custom_db_params.get("sqlite_path"):
-                    st.error("Please provide SQLite database path.")
-                    return False
-            else:
                 if not all([custom_db_params.get("host"),
                            custom_db_params.get("database"),
                            custom_db_params.get("username")]):
@@ -482,6 +454,11 @@ def initialize_chatbot(custom_db_params=None, api_key=None, model=None) -> bool:
         st.session_state.llm = llm
         st.session_state.initialized = True
         # Create memory with appropriate connection
         db_conn = st.session_state.custom_db_connection or get_db()
@@ -553,7 +530,7 @@ def render_schema_explorer():
 def render_chat_interface():
     """Render the main chat interface."""
     st.title("🤖 OnceDataBot")
-    st.caption("Schema-agnostic chatbot • MySQL | PostgreSQL | SQLite • Powered by Groq (FREE!)")
     # Schema explorer
     render_schema_explorer()

 # Database types
 DB_TYPES = {
     "MySQL": "mysql",
+    "PostgreSQL": "postgresql"
 }
 def create_custom_db_config(db_type: str, **kwargs) -> DatabaseConfig:
     """Create a custom database configuration from user input."""
+    return DatabaseConfig(
+        db_type=DatabaseType(db_type),
+        host=kwargs.get("host", ""),
+        port=kwargs.get("port", 3306 if db_type == "mysql" else 5432),
+        database=kwargs.get("database", ""),
+        username=kwargs.get("username", ""),
+        password=kwargs.get("password", ""),
+        ssl_ca=kwargs.get("ssl_ca", None)
+    )
 def create_custom_memory(session_id: str, user_id: str, db_connection, llm_client=None,
         # Show current environment config
         current_db_type = config.database.db_type.value.upper()
         st.info(f"📌 Using {current_db_type} from environment")
+        st.caption(f"Host: {config.database.host}")
         return None
     else:
         )
         db_type = DB_TYPES[db_type_label]
+        if True:  # MySQL or PostgreSQL (SQLite removed)
             # MySQL or PostgreSQL
             col1, col2 = st.columns([3, 1])
             with col1:
             # Validate custom params
             db_type = custom_db_params.get("db_type", "mysql")
+            if True:
                 if not all([custom_db_params.get("host"),
                            custom_db_params.get("database"),
                            custom_db_params.get("username")]):
         st.session_state.llm = llm
         st.session_state.initialized = True
+        st.session_state.indexed = False  # Reset index status on new connection
+        # Clear RAG index to ensure no data from previous DB connection persists
+        if hasattr(chatbot, 'rag_engine') and hasattr(chatbot.rag_engine, 'clear_index'):
+            chatbot.rag_engine.clear_index()
         # Create memory with appropriate connection
         db_conn = st.session_state.custom_db_connection or get_db()
 def render_chat_interface():
     """Render the main chat interface."""
     st.title("🤖 OnceDataBot")
+    st.caption("Schema-agnostic chatbot • MySQL | PostgreSQL • Powered by Groq (FREE!)")
     # Schema explorer
     render_schema_explorer()

chatbot.py CHANGED Viewed

@@ -106,7 +106,8 @@ YOUR RESPONSE:"""
         if not self._schema_initialized:
             raise RuntimeError("Chatbot not initialized. Call initialize() first.")
-        schema = get_schema()
         total_docs = 0
         for table_name, table_info in schema.tables.items():
@@ -117,17 +118,39 @@ YOUR RESPONSE:"""
             pk = table_info.primary_keys[0] if table_info.primary_keys else None
             cols_to_select = text_cols + ([pk] if pk else [])
-            query = f"SELECT {', '.join(cols_to_select)} FROM {table_name} LIMIT 1000"
             try:
                 rows = self.db.execute_query(query)
                 docs = self.rag_engine.index_table(table_name, rows, text_cols, pk)
                 total_docs += docs
                 if progress_callback:
                     progress_callback(table_name, docs)
             except Exception as e:
                 logger.warning(f"Failed to index {table_name}: {e}")
         self.rag_engine.save()
@@ -146,7 +169,8 @@ YOUR RESPONSE:"""
                               error="Configure LLM client first")
         try:
-            schema = get_schema()
             schema_context = schema.to_context_string()
             # Check for memory commands
@@ -384,7 +408,7 @@ YOUR RESPONSE:"""
         """Get a summary of the database schema."""
         if not self._schema_initialized:
             return "Schema not loaded."
-        return get_schema().to_context_string()
 def create_chatbot(llm_client: Optional[LLMClient] = None) -> DatabaseChatbot:

         if not self._schema_initialized:
             raise RuntimeError("Chatbot not initialized. Call initialize() first.")
+        # Use the instance's introspector which might be patched for custom DB
+        schema = self.introspector.introspect()
         total_docs = 0
         for table_name, table_info in schema.tables.items():
             pk = table_info.primary_keys[0] if table_info.primary_keys else None
             cols_to_select = text_cols + ([pk] if pk else [])
+            # Quote table name based on DB specific rules to handle case sensitivity and special chars
+            if self.db.db_type.value == "mysql":
+                quoted_table = f"`{table_name}`"
+            else:
+                quoted_table = f'"{table_name}"'
+            query = f"SELECT {', '.join(cols_to_select)} FROM {quoted_table} LIMIT 1000"
             try:
+                # Try the primary query
+                query = f"SELECT {', '.join(cols_to_select)} FROM {quoted_table} LIMIT 1000"
                 rows = self.db.execute_query(query)
                 docs = self.rag_engine.index_table(table_name, rows, text_cols, pk)
                 total_docs += docs
                 if progress_callback:
                     progress_callback(table_name, docs)
             except Exception as e:
+                # Fallback mechanism for PostgreSQL if table not found (often due to schema issues)
+                if self.db.db_type.value == "postgresql" and "UndefinedTable" in str(e):
+                    try:
+                        logger.warning(f"Initial query failed for {table_name}, trying 'public' schema prefix...")
+                        fallback_query = f"SELECT {', '.join(cols_to_select)} FROM public.\"{table_name}\" LIMIT 1000"
+                        rows = self.db.execute_query(fallback_query)
+                        docs = self.rag_engine.index_table(table_name, rows, text_cols, pk)
+                        total_docs += docs
+                        if progress_callback:
+                            progress_callback(table_name, docs)
+                        continue # Success with fallback
+                    except Exception as e2:
+                        logger.error(f"Fallback query also failed for {table_name}: {e2}")
                 logger.warning(f"Failed to index {table_name}: {e}")
         self.rag_engine.save()
                               error="Configure LLM client first")
         try:
+            # Use instance introspector
+            schema = self.introspector.introspect()
             schema_context = schema.to_context_string()
             # Check for memory commands
         """Get a summary of the database schema."""
         if not self._schema_initialized:
             return "Schema not loaded."
+        return self.introspector.introspect().to_context_string()
 def create_chatbot(llm_client: Optional[LLMClient] = None) -> DatabaseChatbot:

config.py CHANGED Viewed

@@ -24,7 +24,6 @@ class DatabaseType(Enum):
     """Supported database types."""
     MYSQL = "mysql"
     POSTGRESQL = "postgresql"
-    SQLITE = "sqlite"
 class LLMProvider(Enum):
@@ -43,11 +42,11 @@ class EmbeddingProvider(Enum):
 @dataclass
 class DatabaseConfig:
     """
-    Database configuration supporting MySQL, PostgreSQL, and SQLite.
     All sensitive values are loaded from environment variables.
     """
-    # Database type (mysql, postgresql, sqlite)
     db_type: DatabaseType = field(
         default_factory=lambda: DatabaseType(os.getenv("DB_TYPE", "mysql").lower())
     )
@@ -62,17 +61,10 @@ class DatabaseConfig:
     # SSL configuration
     ssl_ca: Optional[str] = field(default_factory=lambda: os.getenv("DB_SSL_CA", os.getenv("MYSQL_SSL_CA", None)))
-    # SQLite-specific: path to database file
-    sqlite_path: str = field(default_factory=lambda: os.getenv("SQLITE_PATH", "./chatbot.db"))
     @property
     def connection_string(self) -> str:
         """Generate SQLAlchemy connection string based on database type."""
-        if self.db_type == DatabaseType.SQLITE:
-            # SQLite uses file path
-            return f"sqlite:///{self.sqlite_path}"
-        elif self.db_type == DatabaseType.POSTGRESQL:
             # PostgreSQL connection string
             base_url = f"postgresql+psycopg2://{self.username}:{self.password}@{self.host}:{self.port}/{self.database}"
             if self.ssl_ca:
@@ -88,12 +80,8 @@ class DatabaseConfig:
     def is_configured(self) -> bool:
         """Check if all required database settings are configured."""
-        if self.db_type == DatabaseType.SQLITE:
-            # SQLite only needs a valid path
-            return bool(self.sqlite_path)
-        else:
-            # MySQL/PostgreSQL need host, database, username, password
-            return all([self.host, self.database, self.username, self.password])
     @property
     def is_mysql(self) -> bool:
@@ -104,11 +92,6 @@ class DatabaseConfig:
     def is_postgresql(self) -> bool:
         """Check if using PostgreSQL."""
         return self.db_type == DatabaseType.POSTGRESQL
-    @property
-    def is_sqlite(self) -> bool:
-        """Check if using SQLite."""
-        return self.db_type == DatabaseType.SQLITE
 @dataclass
@@ -203,9 +186,7 @@ class RAGConfig:
         # MySQL types
         "TEXT", "MEDIUMTEXT", "LONGTEXT", "TINYTEXT", "VARCHAR", "CHAR",
         # PostgreSQL types
-        "CHARACTER VARYING", "CHARACTER",
-        # SQLite types (SQLite is flexible but these are common)
-        "CLOB", "NVARCHAR", "NCHAR"
     ])
     # Minimum character length to consider a column for RAG
@@ -257,10 +238,7 @@ class AppConfig:
         if not self.database.is_configured():
             db_type = self.database.db_type.value.upper()
-            if self.database.is_sqlite:
-                errors.append("SQLite configuration incomplete. Check SQLITE_PATH environment variable.")
-            else:
-                errors.append(f"{db_type} configuration incomplete. Check DB_* environment variables.")
         if not self.llm.is_configured():
             errors.append(

     """Supported database types."""
     MYSQL = "mysql"
     POSTGRESQL = "postgresql"
 class LLMProvider(Enum):
 @dataclass
 class DatabaseConfig:
     """
+    Database configuration supporting MySQL and PostgreSQL.
     All sensitive values are loaded from environment variables.
     """
+    # Database type (mysql, postgresql)
     db_type: DatabaseType = field(
         default_factory=lambda: DatabaseType(os.getenv("DB_TYPE", "mysql").lower())
     )
     # SSL configuration
     ssl_ca: Optional[str] = field(default_factory=lambda: os.getenv("DB_SSL_CA", os.getenv("MYSQL_SSL_CA", None)))
     @property
     def connection_string(self) -> str:
         """Generate SQLAlchemy connection string based on database type."""
+        if self.db_type == DatabaseType.POSTGRESQL:
             # PostgreSQL connection string
             base_url = f"postgresql+psycopg2://{self.username}:{self.password}@{self.host}:{self.port}/{self.database}"
             if self.ssl_ca:
     def is_configured(self) -> bool:
         """Check if all required database settings are configured."""
+        # MySQL/PostgreSQL need host, database, username, password
+        return all([self.host, self.database, self.username, self.password])
     @property
     def is_mysql(self) -> bool:
     def is_postgresql(self) -> bool:
         """Check if using PostgreSQL."""
         return self.db_type == DatabaseType.POSTGRESQL
 @dataclass
         # MySQL types
         "TEXT", "MEDIUMTEXT", "LONGTEXT", "TINYTEXT", "VARCHAR", "CHAR",
         # PostgreSQL types
+        "CHARACTER VARYING", "CHARACTER"
     ])
     # Minimum character length to consider a column for RAG
         if not self.database.is_configured():
             db_type = self.database.db_type.value.upper()
+            errors.append(f"{db_type} configuration incomplete. Check DB_* environment variables.")
         if not self.llm.is_configured():
             errors.append(

database/__pycache__/__init__.cpython-311.pyc CHANGED Viewed

Binary files a/database/__pycache__/__init__.cpython-311.pyc and b/database/__pycache__/__init__.cpython-311.pyc differ

database/__pycache__/connection.cpython-311.pyc CHANGED Viewed

Binary files a/database/__pycache__/connection.cpython-311.pyc and b/database/__pycache__/connection.cpython-311.pyc differ

database/__pycache__/schema_introspector.cpython-311.pyc CHANGED Viewed

Binary files a/database/__pycache__/schema_introspector.cpython-311.pyc and b/database/__pycache__/schema_introspector.cpython-311.pyc differ

database/connection.py CHANGED Viewed

@@ -52,26 +52,7 @@ class DatabaseConnection:
         """
         connect_args = {}
-        if self.config.db_type == DatabaseType.SQLITE:
-            # SQLite-specific settings
-            # Use StaticPool for SQLite to handle multi-threading
-            connect_args["check_same_thread"] = False
-            engine = create_engine(
-                self.config.connection_string,
-                poolclass=StaticPool,  # SQLite works best with StaticPool
-                connect_args=connect_args,
-                echo=False
-            )
-            # Enable foreign keys for SQLite
-            @event.listens_for(engine, "connect")
-            def set_sqlite_pragma(dbapi_connection, connection_record):
-                cursor = dbapi_connection.cursor()
-                cursor.execute("PRAGMA foreign_keys=ON")
-                cursor.close()
-        elif self.config.db_type == DatabaseType.POSTGRESQL:
             # PostgreSQL-specific settings
             if self.config.ssl_ca:
                 connect_args["sslmode"] = "verify-full"

         """
         connect_args = {}
+        if self.config.db_type == DatabaseType.POSTGRESQL:
             # PostgreSQL-specific settings
             if self.config.ssl_ca:
                 connect_args["sslmode"] = "verify-full"

database/schema_introspector.py CHANGED Viewed

@@ -42,9 +42,7 @@ class ColumnInfo:
             # MySQL
             'text', 'mediumtext', 'longtext', 'tinytext', 'varchar', 'char', 'json',
             # PostgreSQL
-            'character varying', 'character', 'text', 'json', 'jsonb',
-            # SQLite (column affinity - TEXT)
-            'clob', 'nvarchar', 'nchar', 'ntext'
         ]
         data_type_lower = self.data_type.lower().split('(')[0].strip()
         return data_type_lower in text_types
@@ -57,9 +55,7 @@ class ColumnInfo:
             'int', 'integer', 'bigint', 'smallint', 'tinyint',
             'decimal', 'numeric', 'float', 'double', 'real',
             # PostgreSQL specific
-            'double precision', 'serial', 'bigserial', 'smallserial',
-            # SQLite (NUMERIC affinity)
-            'bool', 'boolean'
         ]
         data_type_lower = self.data_type.lower().split('(')[0].strip()
         return data_type_lower in numeric_types
@@ -185,10 +181,10 @@ class SchemaIntrospector:
         '_chatbot_user_summaries',
         'schema_migrations',
         'flyway_schema_history',
-        # SQLite internal tables
-        'sqlite_sequence',
-        'sqlite_stat1',
-        'sqlite_stat4'
     }
     def __init__(self, engine: Optional[Engine] = None):
@@ -245,10 +241,7 @@ class SchemaIntrospector:
         db_type = self.db.db_type
         try:
-            if db_type.value == "sqlite":
-                # For SQLite, return the database file name
-                return self.db.config.sqlite_path.split('/')[-1]
-            elif db_type.value == "postgresql":
                 result = self.db.execute_query("SELECT current_database() as db_name")
                 return result[0]['db_name'] if result else "unknown"
             else:  # MySQL
@@ -266,18 +259,7 @@ class SchemaIntrospector:
         db_type = self.db.db_type
         try:
-            if db_type.value == "sqlite":
-                query = """
-                    SELECT name as table_name
-                    FROM sqlite_master
-                    WHERE type='table'
-                    AND name NOT LIKE 'sqlite_%'
-                    ORDER BY name
-                """
-                result = self.db.execute_query(query)
-                return [row['table_name'] for row in result]
-            elif db_type.value == "postgresql":
                 query = """
                     SELECT table_name
                     FROM information_schema.tables
@@ -351,24 +333,7 @@ class SchemaIntrospector:
         db_type = self.db.db_type
         try:
-            if db_type.value == "sqlite":
-                query = f"PRAGMA table_info('{table_name}')"
-                result = self.db.execute_query(query)
-                columns = []
-                for row in result:
-                    columns.append(ColumnInfo(
-                        name=row['name'],
-                        data_type=row['type'] or 'TEXT',  # SQLite columns can have no type
-                        is_nullable=row['notnull'] == 0,
-                        is_primary_key=row['pk'] == 1,
-                        max_length=None,
-                        default_value=row['dflt_value'],
-                        comment=None  # SQLite doesn't support column comments
-                    ))
-                return columns
-            elif db_type.value == "postgresql":
                 query = """
                     SELECT
                         column_name,
@@ -438,17 +403,12 @@ class SchemaIntrospector:
         db_type = self.db.db_type
         try:
-            if db_type.value == "sqlite":
-                query = f"PRAGMA table_info('{table_name}')"
-                result = self.db.execute_query(query)
-                return [row['name'] for row in result if row['pk'] > 0]
-            elif db_type.value == "postgresql":
                 query = """
                     SELECT a.attname as column_name
                     FROM pg_index i
                     JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
-                    WHERE i.indrelid = :table_name::regclass
                     AND i.indisprimary
                 """
                 result = self.db.execute_query(query, {"table_name": table_name})
@@ -475,15 +435,7 @@ class SchemaIntrospector:
         db_type = self.db.db_type
         try:
-            if db_type.value == "sqlite":
-                query = f"PRAGMA foreign_key_list('{table_name}')"
-                result = self.db.execute_query(query)
-                return {
-                    row['from']: f"{row['table']}.{row['to']}"
-                    for row in result
-                }
-            elif db_type.value == "postgresql":
                 query = """
                     SELECT
                         kcu.column_name,
@@ -534,13 +486,7 @@ class SchemaIntrospector:
         db_type = self.db.db_type
         try:
-            if db_type.value == "sqlite":
-                # SQLite doesn't have stats table, use max rowid for estimation
-                query = f"SELECT MAX(rowid) as row_count FROM \"{table_name}\""
-                result = self.db.execute_query(query)
-                return result[0]['row_count'] if result and result[0]['row_count'] else 0
-            elif db_type.value == "postgresql":
                 # Use pg_stat_user_tables for fast estimation
                 query = """
                     SELECT n_live_tup as row_count
@@ -569,13 +515,9 @@ class SchemaIntrospector:
         db_type = self.db.db_type
         try:
-            if db_type.value == "sqlite":
-                # SQLite doesn't support table comments
-                return None
-            elif db_type.value == "postgresql":
                 query = """
-                    SELECT obj_description(:table_name::regclass, 'pg_class') as table_comment
                 """
                 result = self.db.execute_query(query, {"table_name": table_name})
                 comment = result[0]['table_comment'] if result else None

             # MySQL
             'text', 'mediumtext', 'longtext', 'tinytext', 'varchar', 'char', 'json',
             # PostgreSQL
+            'character varying', 'character', 'text', 'json', 'jsonb'
         ]
         data_type_lower = self.data_type.lower().split('(')[0].strip()
         return data_type_lower in text_types
             'int', 'integer', 'bigint', 'smallint', 'tinyint',
             'decimal', 'numeric', 'float', 'double', 'real',
             # PostgreSQL specific
+            'double precision', 'serial', 'bigserial', 'smallserial'
         ]
         data_type_lower = self.data_type.lower().split('(')[0].strip()
         return data_type_lower in numeric_types
         '_chatbot_user_summaries',
         'schema_migrations',
         'flyway_schema_history',
+        # Vector store internal tables
+        'chunks',
+        'embeddings',
+        'vectors'
     }
     def __init__(self, engine: Optional[Engine] = None):
         db_type = self.db.db_type
         try:
+            if db_type.value == "postgresql":
                 result = self.db.execute_query("SELECT current_database() as db_name")
                 return result[0]['db_name'] if result else "unknown"
             else:  # MySQL
         db_type = self.db.db_type
         try:
+            if db_type.value == "postgresql":
                 query = """
                     SELECT table_name
                     FROM information_schema.tables
         db_type = self.db.db_type
         try:
+            if db_type.value == "postgresql":
                 query = """
                     SELECT
                         column_name,
         db_type = self.db.db_type
         try:
+            if db_type.value == "postgresql":
                 query = """
                     SELECT a.attname as column_name
                     FROM pg_index i
                     JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
+                    WHERE i.indrelid = CAST(:table_name AS regclass)
                     AND i.indisprimary
                 """
                 result = self.db.execute_query(query, {"table_name": table_name})
         db_type = self.db.db_type
         try:
+            if db_type.value == "postgresql":
                 query = """
                     SELECT
                         kcu.column_name,
         db_type = self.db.db_type
         try:
+            if db_type.value == "postgresql":
                 # Use pg_stat_user_tables for fast estimation
                 query = """
                     SELECT n_live_tup as row_count
         db_type = self.db.db_type
         try:
+            if db_type.value == "postgresql":
                 query = """
+                    SELECT obj_description(CAST(:table_name AS regclass), 'pg_class') as table_comment
                 """
                 result = self.db.execute_query(query, {"table_name": table_name})
                 comment = result[0]['table_comment'] if result else None

sql/__pycache__/__init__.cpython-311.pyc CHANGED Viewed

Binary files a/sql/__pycache__/__init__.cpython-311.pyc and b/sql/__pycache__/__init__.cpython-311.pyc differ

sql/__pycache__/generator.cpython-311.pyc CHANGED Viewed

Binary files a/sql/__pycache__/generator.cpython-311.pyc and b/sql/__pycache__/generator.cpython-311.pyc differ

sql/__pycache__/validator.cpython-311.pyc CHANGED Viewed

Binary files a/sql/__pycache__/validator.cpython-311.pyc and b/sql/__pycache__/validator.cpython-311.pyc differ

sql/generator.py CHANGED Viewed

@@ -16,8 +16,7 @@ def get_sql_dialect(db_type: str) -> str:
     """Get the SQL dialect name for the given database type."""
     dialects = {
         "mysql": "MySQL",
-        "postgresql": "PostgreSQL",
-        "sqlite": "SQLite"
     }
     return dialects.get(db_type, "SQL")

     """Get the SQL dialect name for the given database type."""
     dialects = {
         "mysql": "MySQL",
+        "postgresql": "PostgreSQL"
     }
     return dialects.get(db_type, "SQL")

sql/validator.py CHANGED Viewed

@@ -47,7 +47,7 @@ class SQLValidator:
     def set_allowed_tables(self, tables: List[str]):
         """Set the whitelist of allowed tables."""
         self.allowed_tables = set(tables)
     def validate(self, sql: str) -> Tuple[bool, str, Optional[str]]:
         """
         Validate SQL query for safety.
@@ -94,7 +94,11 @@ class SQLValidator:
         # Extract and validate tables
         tables = self._extract_tables(statement)
         if self.allowed_tables:
-            invalid_tables = tables - self.allowed_tables
             if invalid_tables:
                 return False, f"Access denied to tables: {invalid_tables}", None
@@ -109,13 +113,14 @@ class SQLValidator:
         sql = str(statement)
         # Use regex to find tables after FROM and JOIN
-        # Pattern: FROM table_name or JOIN table_name
         from_pattern = re.compile(
-            r'\bFROM\s+([a-zA-Z_][a-zA-Z0-9_]*)',
             re.IGNORECASE
         )
         join_pattern = re.compile(
-            r'\bJOIN\s+([a-zA-Z_][a-zA-Z0-9_]*)',
             re.IGNORECASE
         )
@@ -128,7 +133,7 @@ class SQLValidator:
             tables.add(match.group(1))
         return tables
     def _ensure_limit(self, sql: str) -> str:
         """Ensure the query has a LIMIT clause."""
         sql_upper = sql.upper()

     def set_allowed_tables(self, tables: List[str]):
         """Set the whitelist of allowed tables."""
         self.allowed_tables = set(tables)
     def validate(self, sql: str) -> Tuple[bool, str, Optional[str]]:
         """
         Validate SQL query for safety.
         # Extract and validate tables
         tables = self._extract_tables(statement)
         if self.allowed_tables:
+            # Normalize for comparison (remove quotes, lowercase)
+            allowed_norm = {t.lower().replace('"', '').replace('`', '') for t in self.allowed_tables}
+            tables_norm = {t.lower().replace('"', '').replace('`', '') for t in tables}
+            invalid_tables = tables_norm - allowed_norm
             if invalid_tables:
                 return False, f"Access denied to tables: {invalid_tables}", None
         sql = str(statement)
         # Use regex to find tables after FROM and JOIN
+        # Pattern: FROM table_name or JOIN table_name, supporting quotes
+        # Matches: FROM table, FROM "table", FROM `table`
         from_pattern = re.compile(
+            r'\bFROM\s+(?:["`]?)([a-zA-Z0-9_]+)(?:["`]?)',
             re.IGNORECASE
         )
         join_pattern = re.compile(
+            r'\bJOIN\s+(?:["`]?)([a-zA-Z0-9_]+)(?:["`]?)',
             re.IGNORECASE
         )
             tables.add(match.group(1))
         return tables
     def _ensure_limit(self, sql: str) -> str:
         """Ensure the query has a LIMIT clause."""
         sql_upper = sql.upper()