Spaces:

nilotpaldhar2004
/

text2sql-chatbot

Running

App Files Files Community

nilotpaldhar2004 commited on 23 days ago

Commit

c53c8b6

unverified ·

1 Parent(s): af59526

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -93

app.py CHANGED Viewed

@@ -1,14 +1,6 @@
-"""
-app.py — Model: defog/sqlcoder-7b-2 (Text-to-SQL)
-HuggingFace Space: Free Tier  (needs GPU Space or patience on CPU)
-NOTE: 7B model — use HF Spaces with GPU (T4 small) if available.
-      On CPU it will be slow (~60-120s per query) but will work.
-"""
 import os
 import re
 import io
-import json
 import sqlite3
 import tempfile
 import pandas as pd
@@ -17,40 +9,44 @@ from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
 # ── Config ────────────────────────────────────────────────────────────────────
 MODEL_NAME = "defog/sqlcoder-7b-2"
 MAX_NEW_TOKENS = 300
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-LOAD_IN_8BIT = False   # set True if bitsandbytes is available on GPU space
-# ── Load model once ────────────────────────────────────────────────────────────
-print(f"[INFO] Loading model: {MODEL_NAME}  |  device: {DEVICE}")
-print("[INFO] This may take a few minutes on first load...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model_kwargs = {
-    "torch_dtype": torch.float16 if DEVICE == "cuda" else torch.float32,
-    "device_map": "auto" if DEVICE == "cuda" else None,
-    "low_cpu_mem_usage": True,
-}
-if LOAD_IN_8BIT and DEVICE == "cuda":
-    model_kwargs["load_in_8bit"] = True
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, **model_kwargs)
-if DEVICE == "cpu":
-    model = model.to(DEVICE)
 model.eval()
-print("[INFO] Model ready.")
 # ── In-memory store ────────────────────────────────────────────────────────────
 _db_store: dict[str, bytes] = {}
 _schema_store: dict[str, str] = {}
-app = FastAPI(title="CSV-to-SQL Chat (SQLCoder-7B)", version="1.0.0")
 app.add_middleware(
     CORSMiddleware,
@@ -59,13 +55,17 @@ app.add_middleware(
     allow_headers=["*"],
 )
 app.mount("/static", StaticFiles(directory="static"), name="static")
 @app.get("/")
 def root():
     return FileResponse("static/index.html")
 # ── Helpers ────────────────────────────────────────────────────────────────────
 def csv_to_sqlite(df: pd.DataFrame, table_name: str = "data") -> bytes:
     with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
@@ -78,7 +78,6 @@ def csv_to_sqlite(df: pd.DataFrame, table_name: str = "data") -> bytes:
     os.unlink(tmp_path)
     return db_bytes
 def get_schema(db_bytes: bytes) -> str:
     with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
         tmp.write(db_bytes)
@@ -91,9 +90,8 @@ def get_schema(db_bytes: bytes) -> str:
     os.unlink(tmp_path)
     return "\n".join(r[0] for r in rows if r[0])
 def build_prompt(question: str, schema: str) -> str:
-    """SQLCoder uses a specific prompt format."""
     return f"""### Task
 Generate a SQL query to answer [QUESTION]{question}[/QUESTION]
@@ -106,57 +104,35 @@ Given the database schema, here is the SQL query that [QUESTION]{question}[/QUES
 [SQL]
 """
 def generate_sql(question: str, schema: str) -> str:
-    # Extract table name from schema
     table_match = re.search(r'CREATE TABLE\s+"?(\w+)"?', schema, re.IGNORECASE)
-    table_name = table_match.group(1) if table_match else "data"
-    quoted = f'"{table_name}"'
     prompt = build_prompt(question, schema)
-    inputs = tokenizer(
-        prompt,
-        return_tensors="pt",
-        truncation=True,
-        max_length=1024,
-    ).to(DEVICE)
-    eos_token_id = tokenizer.eos_token_id
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
             max_new_tokens=MAX_NEW_TOKENS,
-            num_beams=4,
-            early_stopping=True,
-            pad_token_id=eos_token_id,
         )
-    # Decode only newly generated tokens
     generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
-    sql = tokenizer.decode(generated_ids, skip_special_tokens=True)
-    # Clean SQLCoder artifacts
     sql = sql.split("[/SQL]")[0].strip()
     sql = re.sub(r"```sql|```", "", sql).strip()
-    # Fix 1: replace any FROM/JOIN table reference with correct table
-    sql = re.sub(r'\bFROM\s+("?\w+"?)', f'FROM {quoted}', sql, flags=re.IGNORECASE)
-    sql = re.sub(r'\bJOIN\s+("?\w+"?)', f'JOIN {quoted}', sql, flags=re.IGNORECASE)
-    # Fix 2: strip junk tokens after table name
-    sql = re.sub(
-        r'(FROM\s+"?\w+"?)\s+(?!WHERE|LIMIT|ORDER|GROUP|HAVING|JOIN|LEFT|RIGHT|INNER|ON|AND|OR|\d)(\w+)',
-        r'\1',
-        sql, flags=re.IGNORECASE
-    )
-    # Fix 3: fallback if no SELECT
-    if not re.search(r'\bSELECT\b', sql, re.IGNORECASE):
-        sql = f'SELECT * FROM {quoted} LIMIT 10'
     return sql
 def execute_sql(sql: str, db_bytes: bytes) -> list[dict]:
     with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
         tmp.write(db_bytes)
@@ -167,62 +143,50 @@ def execute_sql(sql: str, db_bytes: bytes) -> list[dict]:
         cur = conn.execute(sql)
         rows = [dict(r) for r in cur.fetchall()]
     except Exception as e:
         conn.close()
         os.unlink(tmp_path)
-        raise HTTPException(status_code=400, detail=f"SQL error: {e}")
-    conn.close()
-    os.unlink(tmp_path)
     return rows
 # ── Routes ─────────────────────────────────────────────────────────────────────
 class QueryRequest(BaseModel):
     session_id: str
     question: str
 @app.post("/upload")
 async def upload_csv(file: UploadFile = File(...)):
     if not file.filename.endswith(".csv"):
-        raise HTTPException(status_code=400, detail="Only CSV files accepted.")
     contents = await file.read()
-    try:
-        df = pd.read_csv(io.BytesIO(contents))
-    except Exception as e:
-        raise HTTPException(status_code=400, detail=f"CSV parse error: {e}")
     session_id = os.urandom(8).hex()
-    table_name = re.sub(r"[^a-zA-Z0-9_]", "_", os.path.splitext(file.filename)[0])[:32] or "data"
-    if table_name[0].isdigit():
-        table_name = "t_" + table_name
     db_bytes = csv_to_sqlite(df, table_name)
     schema = get_schema(db_bytes)
     _db_store[session_id] = db_bytes
     _schema_store[session_id] = schema
-    preview = df.head(5).to_dict(orient="records")
-    columns = list(df.columns)
-    return JSONResponse({
         "session_id": session_id,
-        "table_name": table_name,
-        "columns": columns,
-        "row_count": len(df),
-        "preview": preview,
-        "schema": schema,
-    })
 @app.post("/query")
 async def query(req: QueryRequest):
     if req.session_id not in _db_store:
-        raise HTTPException(status_code=404, detail="Session not found. Upload CSV first.")
     schema = _schema_store[req.session_id]
     sql = generate_sql(req.question, schema)
     results = execute_sql(sql, _db_store[req.session_id])
-    return JSONResponse({"sql": sql, "results": results})
 @app.get("/health")
 def health():
-    return {"status": "ok", "model": MODEL_NAME, "device": DEVICE}

 import os
 import re
 import io
 import sqlite3
 import tempfile
 import pandas as pd
 from fastapi.responses import FileResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import torch
 # ── Config ────────────────────────────────────────────────────────────────────
 MODEL_NAME = "defog/sqlcoder-7b-2"
 MAX_NEW_TOKENS = 300
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# ── Memory-Optimized Model Loading ───────────────────────────────────────────
+print(f"[INFO] Loading model: {MODEL_NAME} | device: {DEVICE}")
+print("[INFO] Applying 4-bit quantization to fit within 16Gi RAM limit...")
+# Configure 4-bit quantization for memory efficiency
+quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_compute_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
+)
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+# Load model with quantization and low memory usage settings
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    quantization_config=quant_config,
+    device_map="auto",
+    low_cpu_mem_usage=True,
+    trust_remote_code=True
+)
 model.eval()
+print("[INFO] Model loaded successfully.")
 # ── In-memory store ────────────────────────────────────────────────────────────
 _db_store: dict[str, bytes] = {}
 _schema_store: dict[str, str] = {}
+app = FastAPI(title="SQLCoder CSV Chat", version="1.1.0")
 app.add_middleware(
     CORSMiddleware,
     allow_headers=["*"],
 )
+# ── Static frontend ────────────────────────────────────────────────────────────
+# Ensure your index.html is in a folder named 'static'
+if not os.path.exists("static"):
+    os.makedirs("static")
 app.mount("/static", StaticFiles(directory="static"), name="static")
 @app.get("/")
 def root():
     return FileResponse("static/index.html")
 # ── Helpers ────────────────────────────────────────────────────────────────────
 def csv_to_sqlite(df: pd.DataFrame, table_name: str = "data") -> bytes:
     with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
     os.unlink(tmp_path)
     return db_bytes
 def get_schema(db_bytes: bytes) -> str:
     with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
         tmp.write(db_bytes)
     os.unlink(tmp_path)
     return "\n".join(r[0] for r in rows if r[0])
 def build_prompt(question: str, schema: str) -> str:
+    """SQLCoder specific prompt format for better accuracy."""
     return f"""### Task
 Generate a SQL query to answer [QUESTION]{question}[/QUESTION]
 [SQL]
 """
 def generate_sql(question: str, schema: str) -> str:
     table_match = re.search(r'CREATE TABLE\s+"?(\w+)"?', schema, re.IGNORECASE)
+    table_name = table_match.group(1) if table_match else "user_data"
     prompt = build_prompt(question, schema)
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
             max_new_tokens=MAX_NEW_TOKENS,
+            do_sample=False,
+            num_beams=1,
+            eos_token_id=tokenizer.eos_token_id,
+            pad_token_id=tokenizer.eos_token_id
         )
+    # Decode newly generated tokens
     generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
+    sql = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
+    # Post-processing and cleaning
     sql = sql.split("[/SQL]")[0].strip()
     sql = re.sub(r"```sql|```", "", sql).strip()
+    sql = re.sub(r'\bFROM\s+(\w+)', f'FROM "{table_name}"', sql, flags=re.IGNORECASE)
     return sql
 def execute_sql(sql: str, db_bytes: bytes) -> list[dict]:
     with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
         tmp.write(db_bytes)
         cur = conn.execute(sql)
         rows = [dict(r) for r in cur.fetchall()]
     except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Execution error: {e}")
+    finally:
         conn.close()
         os.unlink(tmp_path)
     return rows
 # ── Routes ─────────────────────────────────────────────────────────────────────
 class QueryRequest(BaseModel):
     session_id: str
     question: str
 @app.post("/upload")
 async def upload_csv(file: UploadFile = File(...)):
     if not file.filename.endswith(".csv"):
+        raise HTTPException(status_code=400, detail="Invalid file type. Upload a CSV.")
     contents = await file.read()
+    df = pd.read_csv(io.BytesIO(contents))
     session_id = os.urandom(8).hex()
+    table_name = "user_data" # Standardized for internal SQL logic
     db_bytes = csv_to_sqlite(df, table_name)
     schema = get_schema(db_bytes)
     _db_store[session_id] = db_bytes
     _schema_store[session_id] = schema
+    return {
         "session_id": session_id,
+        "columns": list(df.columns),
+        "preview": df.head(3).to_dict(orient="records")
+    }
 @app.post("/query")
 async def query(req: QueryRequest):
     if req.session_id not in _db_store:
+        raise HTTPException(status_code=404, detail="Session expired.")
     schema = _schema_store[req.session_id]
     sql = generate_sql(req.question, schema)
     results = execute_sql(sql, _db_store[req.session_id])
+    return {"sql": sql, "results": results}
 @app.get("/health")
 def health():
+    return {"status": "running", "quantization": "4-bit"}