Spaces:

mhdakmal80
/

olist-text2sql

Runtime error

App Files Files Community

mhdakmal80 commited on Nov 25, 2025

Commit

6a096d0

verified ·

1 Parent(s): d60cb1f

Upload 2 files

Browse files

Files changed (2) hide show

app_gradio.py +37 -24
model_loader.py +15 -4

app_gradio.py CHANGED Viewed

@@ -13,16 +13,27 @@ from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
-# Initialize components
-print("🔄 Initializing model and database...")
-db_path = os.getenv("DATABASE_PATH", "olist.sqlite")
-adapter_path = os.getenv("ADAPTER_PATH", "mhdakmal80/Olist-SQL-Agent-Final")
-db_handler = DatabaseHandler(db_path)
-model_loader = FineTunedModelLoader(adapter_path=adapter_path)
-db_schema = db_handler.get_schema()
-print("✅ Model and database loaded!")
 # Example questions
 EXAMPLES = [
@@ -47,13 +58,16 @@ def generate_and_execute(question):
         Tuple of (sql_query, results_df, status_message)
     """
     if not question or not question.strip():
-        return "", None, "⚠️ Please enter a question"
     # Generate SQL
     result = model_loader.generate_sql(question, db_schema)
     if not result['success']:
-        return "", None, f"❌ SQL Generation Failed: {result['error']}"
     sql_query = result['sql']
@@ -61,15 +75,15 @@ def generate_and_execute(question):
     exec_result = db_handler.execute_query(sql_query)
     if not exec_result['success']:
-        return sql_query, None, f"❌ Query Execution Failed: {exec_result['error']}"
     # Format results
     df = exec_result['data']
     row_count = exec_result['row_count']
-    status = f"✅ Success! Retrieved {row_count} rows"
     if exec_result.get('warning'):
-        status += f"\n⚠️ {exec_result['warning']}"
     return sql_query, df, status
@@ -83,7 +97,7 @@ with gr.Blocks(title="Olist Text-to-SQL Agent", theme=gr.themes.Soft()) as demo:
     **Model**: Mistral-7B-Instruct-v0.2 fine-tuned with QLoRA on Olist e-commerce dataset
-    ⚠️ **Note**: Running on CPU - queries may take 30-60 seconds. For faster performance, the model supports GPU deployment.
     """)
     with gr.Row():
@@ -95,7 +109,7 @@ with gr.Blocks(title="Olist Text-to-SQL Agent", theme=gr.themes.Soft()) as demo:
             )
             with gr.Row():
-                submit_btn = gr.Button("🚀 Generate SQL & Execute", variant="primary")
                 clear_btn = gr.ClearButton([question_input])
         with gr.Column(scale=1):
@@ -132,7 +146,7 @@ with gr.Blocks(title="Olist Text-to-SQL Agent", theme=gr.themes.Soft()) as demo:
     )
     # Info section
-    with gr.Accordion("ℹ️ About this app", open=False):
         gr.Markdown("""
         ### Model Details
         - **Base Model**: mistralai/Mistral-7B-Instruct-v0.2
@@ -152,13 +166,12 @@ with gr.Blocks(title="Olist Text-to-SQL Agent", theme=gr.themes.Soft()) as demo:
         - SQLite for database
         """)
-    with gr.Accordion("🗄️ Database Schema", open=False):
-        gr.Code(
-            value=db_schema,
-            language="sql",
-            label="Database Schema",
-            lines=20
-        )
     # Event handlers
     submit_btn.click(

 # Load environment variables
 load_dotenv()
+# Global variables for lazy loading
+db_handler = None
+model_loader = None
+db_schema = None
+def initialize_components():
+    """Initialize model and database on first use (lazy loading)."""
+    global db_handler, model_loader, db_schema
+    if model_loader is None:
+        print(" Initializing model and database...")
+        db_path = os.getenv("DATABASE_PATH", "olist.sqlite")
+        adapter_path = os.getenv("ADAPTER_PATH", "mhdakmal80/Olist-SQL-Agent-Final")
+        db_handler = DatabaseHandler(db_path)
+        model_loader = FineTunedModelLoader(adapter_path=adapter_path)
+        db_schema = db_handler.get_schema()
+        print(" Model and database loaded!")
+    return db_handler, model_loader, db_schema
 # Example questions
 EXAMPLES = [
         Tuple of (sql_query, results_df, status_message)
     """
     if not question or not question.strip():
+        return "", None, " Please enter a question"
+    # Initialize components on first use (lazy loading)
+    db_handler, model_loader, db_schema = initialize_components()
     # Generate SQL
     result = model_loader.generate_sql(question, db_schema)
     if not result['success']:
+        return "", None, f" SQL Generation Failed: {result['error']}"
     sql_query = result['sql']
     exec_result = db_handler.execute_query(sql_query)
     if not exec_result['success']:
+        return sql_query, None, f" Query Execution Failed: {exec_result['error']}"
     # Format results
     df = exec_result['data']
     row_count = exec_result['row_count']
+    status = f" Success! Retrieved {row_count} rows"
     if exec_result.get('warning'):
+        status += f"\n {exec_result['warning']}"
     return sql_query, df, status
     **Model**: Mistral-7B-Instruct-v0.2 fine-tuned with QLoRA on Olist e-commerce dataset
+     **Note**: Running on CPU - queries may take 30-60 seconds. For faster performance, the model supports GPU deployment.
     """)
     with gr.Row():
             )
             with gr.Row():
+                submit_btn = gr.Button(" Generate SQL & Execute", variant="primary")
                 clear_btn = gr.ClearButton([question_input])
         with gr.Column(scale=1):
     )
     # Info section
+    with gr.Accordion("ℹ About this app", open=False):
         gr.Markdown("""
         ### Model Details
         - **Base Model**: mistralai/Mistral-7B-Instruct-v0.2
         - SQLite for database
         """)
+    with gr.Accordion("Database Schema", open=False):
+        gr.Markdown("""
+        The database schema will be loaded when you submit your first query.
+        **Tables**: orders, customers, products, sellers, payments, reviews, etc.
+        """)
     # Event handlers
     submit_btn.click(

model_loader.py CHANGED Viewed

@@ -31,25 +31,36 @@ class FineTunedModelLoader:
     def _load_model(self):
         """Load the base model and LoRA adapters."""
-        # Configure 4-bit quantization if enabled
-        if self.use_4bit:
             bnb_config = BitsAndBytesConfig(
                 load_in_4bit=True,
                 bnb_4bit_quant_type="nf4",
                 bnb_4bit_compute_dtype=torch.bfloat16,
                 bnb_4bit_use_double_quant=False,
             )
         else:
             bnb_config = None
         # Load base model
         print(f"  Loading base model: {self.base_model_name}")
         base_model = AutoModelForCausalLM.from_pretrained(
             self.base_model_name,
-            quantization_config=bnb_config if self.use_4bit else None,
-            torch_dtype=torch.bfloat16 if not self.use_4bit else None,
             device_map="auto",
             trust_remote_code=True,
         )
         # Load tokenizer

     def _load_model(self):
         """Load the base model and LoRA adapters."""
+        # Check if GPU is available
+        has_gpu = torch.cuda.is_available()
+        if not has_gpu:
+            print("  ⚠️ No GPU detected - loading model on CPU (this will be slow)")
+            print("  ⚠️ Disabling 4-bit quantization (requires GPU)")
+            self.use_4bit = False  # Force disable 4-bit on CPU
+        # Configure 4-bit quantization only if GPU available
+        if self.use_4bit and has_gpu:
             bnb_config = BitsAndBytesConfig(
                 load_in_4bit=True,
                 bnb_4bit_quant_type="nf4",
                 bnb_4bit_compute_dtype=torch.bfloat16,
                 bnb_4bit_use_double_quant=False,
             )
+            print("  ✅ Using 4-bit quantization (GPU)")
         else:
             bnb_config = None
+            print("  ℹ️ Using float32 (CPU mode)")
         # Load base model
         print(f"  Loading base model: {self.base_model_name}")
         base_model = AutoModelForCausalLM.from_pretrained(
             self.base_model_name,
+            quantization_config=bnb_config if (self.use_4bit and has_gpu) else None,
+            torch_dtype=torch.float32 if not has_gpu else torch.bfloat16,  # float32 for CPU
             device_map="auto",
             trust_remote_code=True,
+            low_cpu_mem_usage=True,  # Optimize CPU memory
         )
         # Load tokenizer