diff --git a/__pycache__/avro_handler.cpython-313.pyc b/__pycache__/avro_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cf67fe6b7d7c8a7ae43d146c339c49a07888cc31
Binary files /dev/null and b/__pycache__/avro_handler.cpython-313.pyc differ
diff --git a/__pycache__/backend.cpython-313.pyc b/__pycache__/backend.cpython-313.pyc
index 1e3e92b97fec90689c864b4b11352e1b7dcaefb5..7ffacef767f3f24dc9fa3eacb3ac465420c2ffb0 100644
Binary files a/__pycache__/backend.cpython-313.pyc and b/__pycache__/backend.cpython-313.pyc differ
diff --git a/__pycache__/gliner_model.cpython-313.pyc b/__pycache__/gliner_model.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..351c2954d944308738f8d0d6bff180b09f729713
Binary files /dev/null and b/__pycache__/gliner_model.cpython-313.pyc differ
diff --git a/__pycache__/inspector.cpython-313.pyc b/__pycache__/inspector.cpython-313.pyc
index 46d94e65422cc69f0bbb2fa76bf065329106e604..934d2e918175606a797f8baff75f8eca0753aab0 100644
Binary files a/__pycache__/inspector.cpython-313.pyc and b/__pycache__/inspector.cpython-313.pyc differ
diff --git a/__pycache__/ocr_engine.cpython-313.pyc b/__pycache__/ocr_engine.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..57bcf8029888d5bf51aa9cece684a315e195ac51
Binary files /dev/null and b/__pycache__/ocr_engine.cpython-313.pyc differ
diff --git a/api.py b/api.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd3acf24523d37e3d0963d1695cca0f4728cc2e2
--- /dev/null
+++ b/api.py
@@ -0,0 +1,143 @@
+# api.py
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+from pydantic import BaseModel
+from typing import Optional, List
+import pandas as pd
+import io
+import json
+
+# Import your existing backend orchestrator
+from core.backend import RegexClassifier
+
+app = FastAPI(title="Segmento Sense API")
+
+# Initialize the Brain
+backend = RegexClassifier()
+
+# --- Pydantic Models for Requests ---
+class DbConnection(BaseModel):
+    type: str # postgres, mysql, mongo
+    host: str
+    port: str
+    user: str
+    password: str
+    database: str
+    collection: Optional[str] = None
+
+class CloudConnection(BaseModel):
+    service: str # aws, azure, gcp
+    key_1: str   # access_key or conn_string
+    key_2: Optional[str] = None # secret_key
+    region: Optional[str] = None
+    bucket: str
+    file_name: str
+
+class AppConnection(BaseModel):
+    service: str # gmail, slack, confluence
+    token_or_path: str # token or credentials.json content
+    target: str # channel_id, page_id, or num_emails
+
+# --- ENDPOINTS ---
+
+@app.get("/")
+def health_check():
+    return {"status": "Segmento Sense is running"}
+
+@app.post("/scan/file")
+async def scan_file(file: UploadFile = File(...)):
+    """
+    Handles PDF, CSV, JSON, Parquet, Avro, Image uploads.
+    """
+    file_bytes = await file.read()
+    filename = file.filename.lower()
+    
+    df = pd.DataFrame()
+    raw_text = ""
+    
+    # 1. Route to correct handler in backend.py
+    if filename.endswith(".pdf"):
+        # For demo, scan page 0
+        raw_text = backend.get_pdf_page_text(file_bytes, 0)
+        # Scan text
+        inspection = backend.run_full_inspection(raw_text)
+        matches = backend.analyze_text_hybrid(raw_text)
+        return {
+            "type": "unstructured",
+            "content": raw_text,
+            "matches": matches,
+            "stats": inspection.to_dict(orient="records")
+        }
+    
+    elif filename.endswith((".png", ".jpg", ".jpeg")):
+        raw_text = backend.get_ocr_text_from_image(file_bytes)
+        inspection = backend.run_full_inspection(raw_text)
+        matches = backend.analyze_text_hybrid(raw_text)
+        return {
+            "type": "unstructured",
+            "content": raw_text,
+            "matches": matches,
+            "stats": inspection.to_dict(orient="records")
+        }
+
+    else:
+        # Structured Data
+        if filename.endswith(".csv"):
+            df = pd.read_csv(io.BytesIO(file_bytes))
+        elif filename.endswith(".json"):
+            df = backend.get_json_data(io.BytesIO(file_bytes))
+        elif filename.endswith(".parquet"):
+            df = backend.get_parquet_data(file_bytes)
+        elif filename.endswith(".avro"):
+            df = backend.get_avro_data(file_bytes)
+            
+        # Get PII Counts
+        pii_counts = backend.get_pii_counts_dataframe(df)
+        masked_preview = backend.mask_dataframe(df.head(20))
+        
+        return {
+            "type": "structured",
+            "pii_counts": pii_counts.to_dict(orient="records"),
+            "preview": masked_preview.to_dict(orient="records"),
+            "schema": backend.get_data_schema(df).to_dict(orient="records")
+        }
+
+@app.post("/scan/database")
+async def scan_db(conn: DbConnection):
+    df = pd.DataFrame()
+    if conn.type == "postgres":
+        df = backend.get_postgres_data(conn.host, conn.port, conn.database, conn.user, conn.password, conn.collection)
+    elif conn.type == "mysql":
+        df = backend.get_mysql_data(conn.host, conn.port, conn.database, conn.user, conn.password, conn.collection)
+    elif conn.type == "mongo":
+        df = backend.get_mongodb_data(conn.host, conn.port, conn.database, conn.user, conn.password, conn.collection)
+    
+    if df.empty:
+        raise HTTPException(status_code=404, detail="Connection failed or no data found")
+
+    pii_counts = backend.get_pii_counts_dataframe(df)
+    return {
+        "source": conn.type,
+        "pii_counts": pii_counts.to_dict(orient="records"),
+        "preview": backend.mask_dataframe(df.head(10)).to_dict(orient="records")
+    }
+
+@app.post("/scan/app")
+async def scan_app(conn: AppConnection):
+    df = pd.DataFrame()
+    
+    if conn.service == "slack":
+        df = backend.get_slack_messages(conn.token_or_path, conn.target)
+    elif conn.service == "confluence":
+        # Split target "url|user|page_id" if needed or adjust model
+        # Simplified for demo: assuming backend handles auth
+        pass 
+        
+    if df.empty:
+        raise HTTPException(status_code=400, detail="No data fetched")
+        
+    pii_counts = backend.get_pii_counts_dataframe(df)
+    return {
+        "source": conn.service,
+        "pii_counts": pii_counts.to_dict(orient="records"),
+        "preview": backend.mask_dataframe(df.head(10)).to_dict(orient="records")
+    }
\ No newline at end of file
diff --git a/backend.py b/backend.py
index 42f508af3ed633e27079b6a82607ed9a653a88e5..19c2776185ca84ceee9de9b3dee63417b577a7bc 100644
--- a/backend.py
+++ b/backend.py
@@ -1,68 +1,66 @@
-# backend.py
 import re
 import json
 import pandas as pd
 import fitz  # PyMuPDF
 import nltk
 import io
+import os
+import pickle
+import base64
 from typing import Dict, List, Any
 from sqlalchemy import create_engine
 from urllib.parse import quote_plus
-
-# --- IMPORT MODULES ---
-from spacy_model import PiiSpacyAnalyzer
-from presidio_model import PiiPresidioAnalyzer
-from inspector import ModelInspector
+from bs4 import BeautifulSoup 
+
+# --- IMPORT CLASSIFIERS ---
+from classifier_manager.spacy_model import PiiSpacyAnalyzer
+from classifier_manager.presidio_model import PiiPresidioAnalyzer
+from classifier_manager.gliner_model import PiiGlinerAnalyzer
+from classifier_manager.inspector import ModelInspector
+
+# --- IMPORT FILE HANDLERS ---
+from file_handlers.ocr_engine import OcrEngine
+from file_handlers.avro_handler import AvroHandler
+from file_handlers.parquet_handler import ParquetHandler
+from file_handlers.json_handler import JsonHandler
+from file_handlers.pdf_handler import PdfHandler
+
+# --- IMPORT CONNECTORS ---
+from connectors.postgres_handler import PostgresHandler
+from connectors.mysql_handler import MysqlHandler
+from connectors.gmail_handler import GmailHandler
+from connectors.drive_handler import DriveHandler
+from connectors.aws_s3_handler import S3Handler
+from connectors.azure_handler import AzureBlobHandler
+from connectors.gcp_storage_handler import GcpStorageHandler
+from connectors.slack_handler import SlackHandler           # <--- NEW
+from connectors.confluence_handler import ConfluenceHandler # <--- NEW
 
 # --- DEPENDENCY CHECKS ---
 try:
     from googleapiclient.discovery import build
-    from googleapiclient.http import MediaIoBaseDownload
-    from google.oauth2 import service_account
     GOOGLE_AVAILABLE = True
 except ImportError:
     GOOGLE_AVAILABLE = False
-    print("Google Drive Libraries not installed.")
-
+    print("Google Libraries not installed.")
 try:
     import pymongo
     MONGO_AVAILABLE = True
-except ImportError:
-    MONGO_AVAILABLE = False
-    print("PyMongo not installed.")
-
-try:
-    import pyarrow
-    PARQUET_AVAILABLE = True
-except ImportError:
-    PARQUET_AVAILABLE = False
-    print("PyArrow not installed.")
-
+except: MONGO_AVAILABLE = False
 try:
     import boto3
     AWS_AVAILABLE = True
-except ImportError:
-    AWS_AVAILABLE = False
-    print("Boto3 not installed.")
-
+except: AWS_AVAILABLE = False
 try:
     from azure.storage.blob import BlobServiceClient
     AZURE_AVAILABLE = True
-except ImportError:
-    AZURE_AVAILABLE = False
-    print("Azure Storage Blob not installed.")
-
-# --- GCP STORAGE IMPORT (NEW) ---
+except: AZURE_AVAILABLE = False
 try:
     from google.cloud import storage
-    # We reuse google.oauth2.service_account if available, else import it
-    from google.oauth2 import service_account as gcp_service_account
     GCS_AVAILABLE = True
-except ImportError:
-    GCS_AVAILABLE = False
-    print("Google Cloud Storage library not installed.")
+except: GCS_AVAILABLE = False
 
-# --- NLTK SETUP ---
+# NLTK Setup
 try:
     nltk.data.find('tokenizers/punkt')
 except LookupError:
@@ -75,10 +73,9 @@ except LookupError:
 class RegexClassifier:
     def __init__(self):
         self.colors = {
-            "EMAIL": (136, 238, 255), "FIRST_NAME": (170, 255, 170), "LAST_NAME": (170, 255, 170),
-            "PHONE": (255, 170, 170), "SSN": (255, 204, 170), "CREDIT_CARD": (255, 238, 170),
-            "LOCATION": (200, 170, 255), "AADHAAR_IND": (255, 150, 255), "ORG": (255, 255, 150), 
-            "DEFAULT": (224, 224, 224)
+            "EMAIL": "#8ef", "FIRST_NAME": "#af9", "LAST_NAME": "#af9",
+            "PHONE": "#faa", "SSN": "#fca", "CREDIT_CARD": "#fea",
+            "LOCATION": "#dcf", "ORG": "#ffecb3", "DEFAULT": "#e0e0e0"
         }
         
         self.patterns: Dict[str, str] = {
@@ -90,69 +87,121 @@ class RegexClassifier:
             "PAN_IND": r"\b[A-Z]{5}\d{4}[A-Z]{1}\b",
         }
 
+        # 1. Classifiers
         self.spacy_analyzer = PiiSpacyAnalyzer()
         self.presidio_analyzer = PiiPresidioAnalyzer()
+        self.gliner_analyzer = PiiGlinerAnalyzer()
         self.inspector = ModelInspector()
+        
+        # 2. File Handlers
+        self.ocr_engine = OcrEngine()
+        self.avro_handler = AvroHandler()
+        self.parquet_handler = ParquetHandler()
+        self.json_handler = JsonHandler()
+        self.pdf_handler = PdfHandler(self.ocr_engine)
+
+        # 3. Connectors
+        self.pg_handler = PostgresHandler()
+        self.mysql_handler = MysqlHandler()
+        self.gmail_handler = GmailHandler()
+        self.drive_handler = DriveHandler()
+        self.s3_handler = S3Handler()
+        self.azure_handler = AzureBlobHandler()
+        self.gcp_handler = GcpStorageHandler()
+        self.slack_handler = SlackHandler()           # <--- Init
+        self.confluence_handler = ConfluenceHandler() # <--- Init
 
     def list_patterns(self): return self.patterns
     def add_pattern(self, n, r): self.patterns[n.upper()] = r
     def remove_pattern(self, n): self.patterns.pop(n.upper(), None)
 
-    # --- DETECTION ENGINES ---
+    # --- CORE ANALYSIS ---
     def scan_with_regex(self, text: str) -> List[dict]:
         matches = []
         for label, regex in self.patterns.items():
-            for match in re.finditer(regex, text):
-                matches.append({"label": label, "text": match.group(), "start": match.start(), "end": match.end()})
+            for m in re.finditer(regex, text):
+                matches.append({"label": label, "text": m.group(), "start": m.start(), "end": m.end(), "source": "Regex"})
         return matches
 
     def scan_with_nltk(self, text: str) -> List[dict]:
         detections = []
         try:
-            tokens = nltk.word_tokenize(text)
-            chunked = nltk.ne_chunk(nltk.pos_tag(tokens), binary=False)
-            current_pos = 0 
-            for chunk in chunked:
+            for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(text))):
                 if hasattr(chunk, 'label') and chunk.label() in ['PERSON', 'GPE']:
                     val = " ".join(c[0] for c in chunk)
-                    start_idx = text.find(val, current_pos)
-                    label = "LOCATION" if chunk.label() == 'GPE' else "FIRST_NAME" 
-                    if start_idx != -1:
-                        detections.append({"label": label, "text": val, "start": start_idx, "end": start_idx + len(val)})
-                        current_pos = start_idx + len(val)
+                    start = text.find(val)
+                    if start != -1:
+                        detections.append({
+                            "label": "LOCATION" if chunk.label() == 'GPE' else "FIRST_NAME",
+                            "text": val, "start": start, "end": start+len(val), "source": "NLTK"
+                        })
         except: pass 
         return detections
 
     def analyze_text_hybrid(self, text: str) -> List[dict]:
+        if not text: return []
         all_matches = []
         all_matches.extend(self.scan_with_regex(text))
         all_matches.extend(self.scan_with_nltk(text))
         all_matches.extend(self.spacy_analyzer.scan(text))
         all_matches.extend(self.presidio_analyzer.scan(text))
+        all_matches.extend(self.gliner_analyzer.scan(text))
         
         all_matches.sort(key=lambda x: x['start'])
-        
-        unique_matches = []
+        unique = []
         if not all_matches: return []
         curr = all_matches[0]
-        for next_match in all_matches[1:]:
-            if next_match['start'] < curr['end']:
-                if len(next_match['text']) > len(curr['text']):
-                    curr = next_match
+        for next_m in all_matches[1:]:
+            if next_m['start'] < curr['end']:
+                if len(next_m['text']) > len(curr['text']):
+                    curr = next_m
             else:
-                unique_matches.append(curr)
-                curr = next_match
-        unique_matches.append(curr)
-        return unique_matches
-
-    def run_full_inspection(self, text: str) -> pd.DataFrame:
-        r_matches = self.scan_with_regex(text)
-        n_matches = self.scan_with_nltk(text)
-        s_matches = self.spacy_analyzer.scan(text)
-        p_matches = self.presidio_analyzer.scan(text)
-        return self.inspector.compare_models(r_matches, n_matches, s_matches, p_matches)
-
-    # --- SUMMARY & VISUALS ---
+                unique.append(curr)
+                curr = next_m
+        unique.append(curr)
+        return unique
+
+    def run_full_inspection(self, text: str):
+        return self.inspector.compare_models(
+            self.scan_with_regex(text),
+            self.scan_with_nltk(text),
+            self.spacy_analyzer.scan(text),
+            self.presidio_analyzer.scan(text),
+            self.gliner_analyzer.scan(text)
+        )
+
+    # --- WRAPPERS FOR UI ---
+    def get_json_data(self, file_obj) -> pd.DataFrame:
+        return self.json_handler.read_file(file_obj)
+
+    def get_pdf_page_text(self, file_bytes, page_num):
+        return self.pdf_handler.get_page_text(file_bytes, page_num)
+
+    def get_pdf_total_pages(self, file_bytes) -> int:
+        return self.pdf_handler.get_total_pages(file_bytes)
+
+    def get_labeled_pdf_image(self, file_bytes, page_num):
+        text = self.get_pdf_page_text(file_bytes, page_num)
+        matches = self.analyze_text_hybrid(text)
+        return self.pdf_handler.render_labeled_image(file_bytes, page_num, matches, self.colors)
+
+    def get_avro_data(self, file_bytes) -> pd.DataFrame:
+        return self.avro_handler.convert_to_dataframe(file_bytes)
+    
+    def get_parquet_data(self, file_bytes) -> pd.DataFrame:
+        return self.parquet_handler.convert_to_dataframe(file_bytes)
+        
+    def get_ocr_text_from_image(self, file_bytes) -> str:
+        return self.ocr_engine.extract_text(file_bytes)
+
+    def get_pii_counts_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
+        text = " ".join(df.astype(str).values.flatten())
+        matches = self.analyze_text_hybrid(str(text))
+        if not matches: return pd.DataFrame(columns=["PII Type", "Count"])
+        counts = {}
+        for m in matches: counts[m['label']] = counts.get(m['label'], 0) + 1
+        return pd.DataFrame(list(counts.items()), columns=["PII Type", "Count"])
+    
     def get_pii_counts(self, text: str) -> pd.DataFrame:
         matches = self.analyze_text_hybrid(str(text))
         if not matches: return pd.DataFrame(columns=["PII Type", "Count"])
@@ -160,261 +209,78 @@ class RegexClassifier:
         for m in matches: counts[m['label']] = counts.get(m['label'], 0) + 1
         return pd.DataFrame(list(counts.items()), columns=["PII Type", "Count"])
 
-    def get_pii_counts_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
-        full_text = " ".join(df.astype(str).values.flatten())
-        return self.get_pii_counts(full_text)
-
-    def mask_pii(self, text: str) -> str:
-        text = str(text)
-        matches = self.analyze_text_hybrid(text)
-        matches.sort(key=lambda x: x['start'], reverse=True)
-        for m in matches:
-            masked_val = "******"
-            if "<span" not in text[m['start']:m['end']]:
-                text = text[:m['start']] + masked_val + text[m['end']:]
-        return text
-
     def mask_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
-        def safe_mask(val):
-            if isinstance(val, (list, dict, tuple, set)): return self.mask_pii(str(val))
-            if pd.isna(val): return val
-            return self.mask_pii(str(val))
-        return df.map(safe_mask)
-
-    def get_labeled_pdf_image(self, file_bytes, page_num: int):
-        try:
-            doc = fitz.open(stream=file_bytes, filetype="pdf")
-            if not (0 <= page_num < len(doc)): return None
-            page = doc[page_num]
-            text = page.get_text("text")
+        def mask_text(text):
+            text = str(text)
             matches = self.analyze_text_hybrid(text)
+            matches.sort(key=lambda x: x['start'], reverse=True)
             for m in matches:
-                color_norm = tuple(c/255 for c in self.colors.get(m['label'], self.colors["DEFAULT"]))
-                quads = page.search_for(m['text'])
-                for quad in quads:
-                    page.draw_rect(quad, color=color_norm, fill=color_norm, fill_opacity=0.4)
-                    page.insert_text(fitz.Point(quad.x0, quad.y0-2), m['label'], fontsize=6, color=(0,0,0))
-            return page.get_pixmap(matrix=fitz.Matrix(2, 2)).tobytes("png")
-        except: return None
+                if "***" not in text[m['start']:m['end']]:
+                    text = text[:m['start']] + "******" + text[m['end']:]
+            return text
+        return df.map(lambda x: mask_text(x) if isinstance(x, (str, int, float)) else x)
 
     def scan_dataframe_with_html(self, df: pd.DataFrame) -> pd.DataFrame:
-        def highlight_html(text):
+        def highlight(text):
             text = str(text)
             matches = self.analyze_text_hybrid(text)
             matches.sort(key=lambda x: x['start'], reverse=True)
-            hex_map = {"EMAIL": "#8ef", "PHONE": "#faa", "SSN": "#fca", "CREDIT_CARD": "#fea", "FIRST_NAME": "#af9", "LAST_NAME": "#af9", "LOCATION": "#dcf", "AADHAAR_IND": "#f9f", "ORG": "#ffecb3", "DEFAULT": "#e0e0e0"}
             for m in matches:
                 if "<span" in text[m['start']:m['end']]: continue
-                color = hex_map.get(m['label'], "#e0e0e0")
-                tag = f'<span style="background-color: {color}; padding: 0 2px; border-radius: 3px; border: 1px solid #ccc;">{m["text"]}</span>'
-                text = text[:m['start']] + tag + text[m['end']:]
+                color = self.colors.get(m['label'], self.colors["DEFAULT"])
+                replacement = f'<span style="background:{color}; padding:2px; border-radius:4px;">{m["text"]}</span>'
+                text = text[:m['start']] + replacement + text[m['end']:]
             return text
-        def safe_highlight(val):
-             if isinstance(val, (list, dict)): return highlight_html(str(val))
-             if pd.isna(val): return val
-             return highlight_html(val)
-        return df.map(safe_highlight)
-
-    def get_data_schema(self, df: pd.DataFrame) -> pd.DataFrame:
-        if df.empty: return pd.DataFrame(columns=["Column", "Type", "Sample"])
-        schema_info = []
-        for col in df.columns:
-            d_type = str(df[col].dtype)
-            first_valid_idx = df[col].first_valid_index()
-            sample_val = str(df[col].loc[first_valid_idx]) if first_valid_idx is not None else "All Null"
-            if len(sample_val) > 50: sample_val = sample_val[:47] + "..."
-            schema_info.append({"Column Name": col, "Data Type": d_type, "Sample Value": sample_val})
-        return pd.DataFrame(schema_info)
-
-    # --- SQL/MONGO/DRIVE/S3/AZURE CONNECTORS ---
+        return df.map(lambda x: highlight(x) if isinstance(x, str) else x)
+
+    def get_data_schema(self, df):
+        return pd.DataFrame({"Column": df.columns, "Type": df.dtypes.astype(str)})
+
+    # --- CONNECTOR WRAPPERS ---
     def get_postgres_data(self, host, port, db, user, pw, table):
-        safe_pw = quote_plus(pw)
-        conn_str = f"postgresql://{user}:{safe_pw}@{host}:{port}/{db}"
-        engine = create_engine(conn_str)
-        return pd.read_sql(f"SELECT * FROM {table} LIMIT 100", engine)
+        return self.pg_handler.fetch_data(host, port, db, user, pw, table)
 
     def get_mysql_data(self, host, port, db, user, pw, table):
-        safe_pw = quote_plus(pw)
-        conn_str = f"mysql+pymysql://{user}:{safe_pw}@{host}:{port}/{db}"
-        engine = create_engine(conn_str)
-        return pd.read_sql(f"SELECT * FROM {table} LIMIT 100", engine)
+        return self.mysql_handler.fetch_data(host, port, db, user, pw, table)
 
-    def get_mongodb_data(self, host, port, db, user, pw, collection):
-        if not MONGO_AVAILABLE: return pd.DataFrame()
-        try:
-            if user and pw:
-                safe_user = quote_plus(user)
-                safe_pw = quote_plus(pw)
-                uri = f"mongodb://{safe_user}:{safe_pw}@{host}:{port}/"
-            else:
-                uri = f"mongodb://{host}:{port}/"
-            client = pymongo.MongoClient(uri, serverSelectionTimeoutMS=5000)
-            database = client[db]
-            col = database[collection]
-            cursor = col.find().limit(100)
-            data_list = list(cursor)
-            if not data_list: return pd.DataFrame()
-            for doc in data_list:
-                if '_id' in doc: doc['_id'] = str(doc['_id'])
-            return pd.json_normalize(data_list)
-        except Exception as e:
-            print(f"Mongo Error: {e}")
-            raise e
+    def get_gmail_data(self, credentials_file, num_emails=10) -> pd.DataFrame:
+        return self.gmail_handler.fetch_emails(credentials_file, num_emails)
 
     def get_google_drive_files(self, credentials_dict):
-        if not GOOGLE_AVAILABLE: return []
-        try:
-            SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
-            creds = service_account.Credentials.from_service_account_info(credentials_dict, scopes=SCOPES)
-            service = build('drive', 'v3', credentials=creds)
-            return service.files().list(pageSize=15, fields="files(id, name, mimeType)").execute().get('files', [])
-        except Exception as e:
-            return []
+        return self.drive_handler.list_files(credentials_dict)
 
     def download_drive_file(self, file_id, mime_type, credentials_dict):
-        if not GOOGLE_AVAILABLE: return b""
-        try:
-            SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
-            creds = service_account.Credentials.from_service_account_info(credentials_dict, scopes=SCOPES)
-            service = build('drive', 'v3', credentials=creds)
-            if "spreadsheet" in mime_type: request = service.files().export_media(fileId=file_id, mimeType='text/csv')
-            elif "document" in mime_type: request = service.files().export_media(fileId=file_id, mimeType='application/pdf')
-            elif "presentation" in mime_type: request = service.files().export_media(fileId=file_id, mimeType='application/pdf')
-            else: request = service.files().get_media(fileId=file_id)
-            fh = io.BytesIO()
-            downloader = MediaIoBaseDownload(fh, request)
-            done = False
-            while done is False: status, done = downloader.next_chunk()
-            return fh.getvalue()
-        except: return b""
-
-    def get_s3_buckets(self, access_key, secret_key, region):
-        if not AWS_AVAILABLE: return []
-        try:
-            s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=region)
-            response = s3.list_buckets()
-            return [b['Name'] for b in response.get('Buckets', [])]
-        except Exception as e:
-            print(f"S3 Error: {e}")
-            return []
-
-    def get_s3_files(self, access_key, secret_key, region, bucket_name):
-        if not AWS_AVAILABLE: return []
-        try:
-            s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=region)
-            response = s3.list_objects_v2(Bucket=bucket_name)
-            return [obj['Key'] for obj in response.get('Contents', [])]
-        except Exception as e:
-            return []
-
-    def download_s3_file(self, access_key, secret_key, region, bucket_name, file_key):
-        if not AWS_AVAILABLE: return b""
-        try:
-            s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=region)
-            obj = s3.get_object(Bucket=bucket_name, Key=file_key)
-            return obj['Body'].read()
-        except Exception as e:
-            return b""
-
-    def get_azure_containers(self, conn_str):
-        if not AZURE_AVAILABLE: return []
-        try:
-            blob_service_client = BlobServiceClient.from_connection_string(conn_str)
-            containers = blob_service_client.list_containers()
-            return [c['name'] for c in containers]
-        except Exception as e:
-            print(f"Azure Error: {e}")
-            return []
-
-    def get_azure_blobs(self, conn_str, container_name):
-        if not AZURE_AVAILABLE: return []
-        try:
-            blob_service_client = BlobServiceClient.from_connection_string(conn_str)
-            container_client = blob_service_client.get_container_client(container_name)
-            blobs = container_client.list_blobs()
-            return [b['name'] for b in blobs]
-        except Exception as e:
-            return []
-
-    def download_azure_blob(self, conn_str, container_name, blob_name):
-        if not AZURE_AVAILABLE: return b""
-        try:
-            blob_service_client = BlobServiceClient.from_connection_string(conn_str)
-            blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
-            return blob_client.download_blob().readall()
-        except Exception as e:
-            return b""
-
-    # --- GCP BUCKET CONNECTORS (NEW) ---
-    def get_gcs_buckets(self, credentials_dict):
-        """Lists all GCS buckets for the given service account credentials."""
-        if not GCS_AVAILABLE: return []
-        try:
-            # Create credentials object
-            credentials = gcp_service_account.Credentials.from_service_account_info(credentials_dict)
-            # Create storage client
-            storage_client = storage.Client(credentials=credentials, project=credentials_dict.get('project_id'))
-            
-            buckets = storage_client.list_buckets()
-            return [bucket.name for bucket in buckets]
-        except Exception as e:
-            print(f"GCP Bucket Error: {e}")
-            return []
-
-    def get_gcs_files(self, credentials_dict, bucket_name):
-        """Lists files (blobs) in a specific GCS bucket."""
-        if not GCS_AVAILABLE: return []
-        try:
-            credentials = gcp_service_account.Credentials.from_service_account_info(credentials_dict)
-            storage_client = storage.Client(credentials=credentials, project=credentials_dict.get('project_id'))
-            
-            blobs = storage_client.list_blobs(bucket_name)
-            return [blob.name for blob in blobs]
-        except Exception as e:
-            print(f"GCP List Error: {e}")
-            return []
-
-    def download_gcs_file(self, credentials_dict, bucket_name, blob_name):
-        """Downloads a blob from GCS to memory."""
-        if not GCS_AVAILABLE: return b""
-        try:
-            credentials = gcp_service_account.Credentials.from_service_account_info(credentials_dict)
-            storage_client = storage.Client(credentials=credentials, project=credentials_dict.get('project_id'))
-            
-            bucket = storage_client.bucket(bucket_name)
-            blob = bucket.blob(blob_name)
-            return blob.download_as_bytes()
-        except Exception as e:
-            print(f"GCP Download Error: {e}")
-            return b""
-
-    # --- FILE READERS ---
-    def get_json_data(self, file_obj) -> pd.DataFrame:
-        data = json.load(file_obj)
-        flat = []
-        def recursive(d, path):
-            if isinstance(d, dict):
-                for k, v in d.items(): recursive(v, f"{path}.{k}" if path else k)
-            elif isinstance(d, list):
-                for i, v in enumerate(d): recursive(v, f"{path}[{i}]")
-            else: flat.append({"Path": path, "Value": str(d)})
-        recursive(data, "")
-        return pd.DataFrame(flat)
+        return self.drive_handler.download_file(file_id, mime_type, credentials_dict)
 
-    def get_parquet_data(self, file_bytes) -> pd.DataFrame:
-        if not PARQUET_AVAILABLE: return pd.DataFrame()
-        try:
-            return pd.read_parquet(io.BytesIO(file_bytes))
-        except: return pd.DataFrame()
-
-    def get_pdf_total_pages(self, file_bytes) -> int:
-        try:
-            doc = fitz.open(stream=file_bytes, filetype="pdf")
-            return len(doc)
-        except: return 0
+    def get_s3_buckets(self, a, s, r): return self.s3_handler.get_buckets(a, s, r)
+    def get_s3_files(self, a, s, r, b): return self.s3_handler.get_files(a, s, r, b)
+    def download_s3_file(self, a, s, r, b, k): return self.s3_handler.download_file(a, s, r, b, k)
     
-    def get_pdf_page_text(self, file_bytes, page_num):
+    def get_azure_containers(self, c): return self.azure_handler.get_containers(c)
+    def get_azure_blobs(self, c, n): return self.azure_handler.get_blobs(c, n)
+    def download_azure_blob(self, c, n, b): return self.azure_handler.download_blob(c, n, b)
+
+    def get_gcs_buckets(self, c): return self.gcp_handler.get_buckets(c)
+    def get_gcs_files(self, c, b): return self.gcp_handler.get_files(c, b)
+    def download_gcs_file(self, c, b, n): return self.gcp_handler.download_file(c, b, n)
+
+    # --- NEW WRAPPERS FOR SLACK & CONFLUENCE ---
+    def get_slack_messages(self, token, channel_id):
+        return self.slack_handler.fetch_messages(token, channel_id)
+
+    def get_confluence_page(self, url, username, token, page_id):
+        return self.confluence_handler.fetch_page_content(url, username, token, page_id)
+
+    # --- MONGO (Still here) ---
+    def get_mongodb_data(self, host, port, db, user, pw, collection):
+        if not MONGO_AVAILABLE: return pd.DataFrame()
         try:
-            doc = fitz.open(stream=file_bytes, filetype="pdf")
-            return doc[page_num].get_text("text")
-        except: return ""
\ No newline at end of file
+            if user and pw: uri = f"mongodb://{quote_plus(user)}:{quote_plus(pw)}@{host}:{port}/"
+            else: uri = f"mongodb://{host}:{port}/"
+            client = pymongo.MongoClient(uri, serverSelectionTimeoutMS=5000)
+            cursor = client[db][collection].find().limit(100)
+            data = list(cursor)
+            if not data: return pd.DataFrame()
+            for d in data: d['_id'] = str(d.get('_id', ''))
+            return pd.json_normalize(data)
+        except: return pd.DataFrame()
\ No newline at end of file
diff --git a/new_spacy b/classifier_manager/__init__.py
similarity index 100%
rename from new_spacy
rename to classifier_manager/__init__.py
diff --git a/classifier_manager/__pycache__/__init__.cpython-313.pyc b/classifier_manager/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..37c0ce4789cf3dfea9f767888bb820b087228630
Binary files /dev/null and b/classifier_manager/__pycache__/__init__.cpython-313.pyc differ
diff --git a/classifier_manager/__pycache__/gliner_model.cpython-313.pyc b/classifier_manager/__pycache__/gliner_model.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1a8c5072eaa0156456a7f30abffd4c31a126af10
Binary files /dev/null and b/classifier_manager/__pycache__/gliner_model.cpython-313.pyc differ
diff --git a/classifier_manager/__pycache__/inspector.cpython-313.pyc b/classifier_manager/__pycache__/inspector.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..227e751b4b1108087d773a9c3d3cada3614be3db
Binary files /dev/null and b/classifier_manager/__pycache__/inspector.cpython-313.pyc differ
diff --git a/classifier_manager/__pycache__/presidio_model.cpython-313.pyc b/classifier_manager/__pycache__/presidio_model.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a8cd78453b75b13684ac166555c0eb0b23bc7e90
Binary files /dev/null and b/classifier_manager/__pycache__/presidio_model.cpython-313.pyc differ
diff --git a/classifier_manager/__pycache__/regex_scanner.cpython-313.pyc b/classifier_manager/__pycache__/regex_scanner.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..798f53f8542f061dc7a4e874ea5c7f6938ea34af
Binary files /dev/null and b/classifier_manager/__pycache__/regex_scanner.cpython-313.pyc differ
diff --git a/classifier_manager/__pycache__/spacy_model.cpython-313.pyc b/classifier_manager/__pycache__/spacy_model.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..199c45a45d929bca3dcc62ebd48dd9c54324841c
Binary files /dev/null and b/classifier_manager/__pycache__/spacy_model.cpython-313.pyc differ
diff --git a/classifier_manager/gliner_model.py b/classifier_manager/gliner_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1b619f9fa73ebe1071b66689174f87670ac0617
--- /dev/null
+++ b/classifier_manager/gliner_model.py
@@ -0,0 +1,81 @@
+from gliner import GLiNER
+
+class PiiGlinerAnalyzer:
+    def __init__(self, model_name="urchade/gliner_small-v2.1"):
+        """
+        Initializes the GLiNER model.
+        Uses a small, efficient BERT-based model by default.
+        """
+        self.model = None
+        self.available = False
+        
+        # Define the natural language labels you want GLiNER to look for.
+        # These are used as prompts for the model.
+        self.labels = [
+            "person", 
+            "email", 
+            "phone number", 
+            "credit card", 
+            "social security number", 
+            "organization", 
+            "location", 
+            "date", 
+            "ip address",
+            "passport number",
+            "driver license"
+        ]
+        
+        try:
+            print(f"⏳ Loading GLiNER model: {model_name}...")
+            # This will download the model to your local cache on the first run
+            self.model = GLiNER.from_pretrained(model_name)
+            self.available = True
+            print("✅ GLiNER model loaded successfully.")
+        except Exception as e:
+            print(f"❌ Error loading GLiNER: {e}")
+
+    def scan(self, text: str) -> list:
+        """
+        Scans text using GLiNER and normalizes the output for the Inspector.
+        """
+        if not self.available or not text or not text.strip():
+            return []
+
+        try:
+            # GLiNER takes text and a list of labels as input
+            # Threshold 0.5 is a good balance for the small model
+            entities = self.model.predict_entities(text, self.labels, threshold=0.5)
+            
+            detections = []
+            
+            # Map GLiNER's lowercase output labels to your App's standard uppercase keys
+            # to ensure consistency in the UI and Inspector.
+            label_map = {
+                "person": "FIRST_NAME",
+                "phone number": "PHONE",
+                "social security number": "SSN",
+                "organization": "ORG",
+                "location": "LOCATION",
+                "ip address": "IP_ADDRESS",
+                "credit card": "CREDIT_CARD",
+                "email": "EMAIL",
+                "date": "DATE_TIME",
+                "passport number": "PASSPORT",
+                "driver license": "DRIVER_LICENSE"
+            }
+
+            for ent in entities:
+                detections.append({
+                    "label": label_map.get(ent["label"], ent["label"].upper().replace(" ", "_")),
+                    "text": ent["text"],
+                    "start": ent["start"],
+                    "end": ent["end"],
+                    "score": ent["score"],
+                    "source": "GLiNER" # Helpful metadata
+                })
+            
+            return detections
+
+        except Exception as e:
+            print(f"⚠️ GLiNER Scan Error: {e}")
+            return []
\ No newline at end of file
diff --git a/inspector.py b/classifier_manager/inspector.py
similarity index 68%
rename from inspector.py
rename to classifier_manager/inspector.py
index 94aedf65de9dc7a987ddf9cb3dde195be5155c14..bb92462bb0aaaf0fae8bc540ed08de35830fb0d5 100644
--- a/inspector.py
+++ b/classifier_manager/inspector.py
@@ -12,16 +12,17 @@ class ModelInspector:
             "end": match["end"]
         }
 
-    def compare_models(self, regex_matches, nltk_matches, spacy_matches, presidio_matches):
+    def compare_models(self, regex_matches, nltk_matches, spacy_matches, presidio_matches, gliner_matches):
         """
-        Compares 4 lists of matches to find Unique vs Missed PII.
+        Compares 5 lists of matches to find Unique vs Missed PII.
+        Added GLiNER to the comparison logic.
         """
         all_detections = {}
         
         def add_to_master(matches, model_name):
             found_set = set()
             for m in matches:
-                # Use tuple key for uniqueness
+                # Use tuple key for uniqueness: (start, end, text)
                 key = (m['start'], m['end'], m['text']) 
                 if key not in all_detections:
                     all_detections[key] = {'text': m['text'], 'label': m['label']}
@@ -32,19 +33,26 @@ class ModelInspector:
         regex_set = add_to_master(regex_matches, "Regex")
         nltk_set = add_to_master(nltk_matches, "NLTK")
         spacy_set = add_to_master(spacy_matches, "SpaCy")
-        presidio_set = add_to_master(presidio_matches, "Presidio") # <--- Added Presidio
+        presidio_set = add_to_master(presidio_matches, "Presidio")
+        gliner_set = add_to_master(gliner_matches, "GLiNER") # <--- Added GLiNER
 
-        # 2. Calculate "Missed" Data
+        # 2. Calculate "Missed" Data (Union of all models)
         total_unique_pii = set(all_detections.keys())
         
         regex_missed = total_unique_pii - regex_set
         nltk_missed = total_unique_pii - nltk_set
         spacy_missed = total_unique_pii - spacy_set
-        presidio_missed = total_unique_pii - presidio_set # <--- Added Presidio
+        presidio_missed = total_unique_pii - presidio_set
+        gliner_missed = total_unique_pii - gliner_set # <--- Added GLiNER
 
         def fmt(item_set):
             items = [all_detections[k]['text'] for k in item_set]
-            return ", ".join(items) if items else "None"
+            # Limiting to first 5 items to prevent UI clutter if list is huge
+            display_items = items[:5]
+            res = ", ".join(display_items)
+            if len(items) > 5:
+                res += f", (+{len(items)-5} more)"
+            return res if res else "None"
 
         total_count = len(total_unique_pii) if len(total_unique_pii) > 0 else 1
         
@@ -76,7 +84,15 @@ class ModelInspector:
                 "Missed PII": fmt(presidio_missed),
                 "Accuracy": len(presidio_set) / total_count,
                 "Count": len(presidio_set)
+            },
+            {
+                "Model": "🦅 GLiNER",
+                "Detected PII": fmt(gliner_set),
+                "Missed PII": fmt(gliner_missed),
+                "Accuracy": len(gliner_set) / total_count,
+                "Count": len(gliner_set)
             }
         ]
 
-        return pd.DataFrame(stats)
\ No newline at end of file
+        # Return sorted by Accuracy descending so best model is on top
+        return pd.DataFrame(stats).sort_values(by="Accuracy", ascending=False)
\ No newline at end of file
diff --git a/presidio_model.py b/classifier_manager/presidio_model.py
similarity index 100%
rename from presidio_model.py
rename to classifier_manager/presidio_model.py
diff --git a/classifier_manager/regex_scanner.py b/classifier_manager/regex_scanner.py
new file mode 100644
index 0000000000000000000000000000000000000000..29c57051f728c27f28591a28c3859f69fd021c18
--- /dev/null
+++ b/classifier_manager/regex_scanner.py
@@ -0,0 +1,44 @@
+import re
+from typing import Dict, List
+
+class RegexScanner:
+    def __init__(self):
+        self.colors = {
+            "EMAIL": "#8ef", "FIRST_NAME": "#af9", "LAST_NAME": "#af9",
+            "PHONE": "#faa", "SSN": "#fca", "CREDIT_CARD": "#fea",
+            "LOCATION": "#dcf", "ORG": "#ffecb3", "DEFAULT": "#e0e0e0"
+        }
+        
+        self.patterns: Dict[str, str] = {
+            "EMAIL": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b",
+            "PHONE": r"\b(?:\+?1[-. ]?)?\(?([0-9]{3})\)?[-. ]?([0-9]{3})[-. ]?([0-9]{4})\b",
+            "SSN": r"\b\d{3}-\d{2}-\d{4}\b",
+            "CREDIT_CARD": r"\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b",
+            "AADHAAR_IND": r"\b\d{4}[ -]?\d{4}[ -]?\d{4}\b",
+            "PAN_IND": r"\b[A-Z]{5}\d{4}[A-Z]{1}\b",
+        }
+
+    def add_pattern(self, name, regex):
+        self.patterns[name.upper()] = regex
+
+    def remove_pattern(self, name):
+        self.patterns.pop(name.upper(), None)
+
+    def scan(self, text: str) -> List[dict]:
+        """
+        Scans text using defined Regex patterns.
+        """
+        matches = []
+        for label, regex in self.patterns.items():
+            try:
+                for m in re.finditer(regex, text):
+                    matches.append({
+                        "label": label,
+                        "text": m.group(),
+                        "start": m.start(),
+                        "end": m.end(),
+                        "source": "Regex"
+                    })
+            except re.error:
+                continue # Skip invalid user-defined regex
+        return matches
\ No newline at end of file
diff --git a/Spacy_model.py b/classifier_manager/spacy_model.py
similarity index 100%
rename from Spacy_model.py
rename to classifier_manager/spacy_model.py
diff --git a/connectors/__init__.py b/connectors/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/connectors/__pycache__/__init__.cpython-313.pyc b/connectors/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b38b9e7c94485879719cf71a01398327eefa3bfe
Binary files /dev/null and b/connectors/__pycache__/__init__.cpython-313.pyc differ
diff --git a/connectors/__pycache__/aws_s3_handler.cpython-313.pyc b/connectors/__pycache__/aws_s3_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7a0ba91b85fd4225efb93b038d63c0c7ab686711
Binary files /dev/null and b/connectors/__pycache__/aws_s3_handler.cpython-313.pyc differ
diff --git a/connectors/__pycache__/azure_handler.cpython-313.pyc b/connectors/__pycache__/azure_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..46b29c428a5a7a887659a308d8812500634222bb
Binary files /dev/null and b/connectors/__pycache__/azure_handler.cpython-313.pyc differ
diff --git a/connectors/__pycache__/confluence_handler.cpython-313.pyc b/connectors/__pycache__/confluence_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8664b74053e6e241cf74b56fb78906a7e26a7b66
Binary files /dev/null and b/connectors/__pycache__/confluence_handler.cpython-313.pyc differ
diff --git a/connectors/__pycache__/drive_handler.cpython-313.pyc b/connectors/__pycache__/drive_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a8e1e416719275f8ade5a73ad44037bf9ab0643c
Binary files /dev/null and b/connectors/__pycache__/drive_handler.cpython-313.pyc differ
diff --git a/connectors/__pycache__/gcp_storage_handler.cpython-313.pyc b/connectors/__pycache__/gcp_storage_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71cbe4098902a2813f48eedf0331abc2a97d67e7
Binary files /dev/null and b/connectors/__pycache__/gcp_storage_handler.cpython-313.pyc differ
diff --git a/connectors/__pycache__/gmail_handler.cpython-313.pyc b/connectors/__pycache__/gmail_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6bd9a866bb052a5868def12211796844d47e8e57
Binary files /dev/null and b/connectors/__pycache__/gmail_handler.cpython-313.pyc differ
diff --git a/connectors/__pycache__/mongo_handler.cpython-313.pyc b/connectors/__pycache__/mongo_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a33fc41fbb0cfd3e3d280a7b1281f8542fbf37b
Binary files /dev/null and b/connectors/__pycache__/mongo_handler.cpython-313.pyc differ
diff --git a/connectors/__pycache__/mysql_handler.cpython-313.pyc b/connectors/__pycache__/mysql_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..64ab13b7cea1fd487bca207d3dfadf64795987de
Binary files /dev/null and b/connectors/__pycache__/mysql_handler.cpython-313.pyc differ
diff --git a/connectors/__pycache__/postgres_handler.cpython-313.pyc b/connectors/__pycache__/postgres_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..782d07d168ae050abbb0ecca5bb7d2b41946ae11
Binary files /dev/null and b/connectors/__pycache__/postgres_handler.cpython-313.pyc differ
diff --git a/connectors/__pycache__/slack_handler.cpython-313.pyc b/connectors/__pycache__/slack_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..72728236c1cd9918b49338adc2c876b5b53a1c5d
Binary files /dev/null and b/connectors/__pycache__/slack_handler.cpython-313.pyc differ
diff --git a/connectors/aws_s3_handler.py b/connectors/aws_s3_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..4134a4461f31aad00bf88fb54c512713a10338d6
--- /dev/null
+++ b/connectors/aws_s3_handler.py
@@ -0,0 +1,32 @@
+import boto3
+import io
+
+class S3Handler:
+    def __init__(self):
+        print("✅ AWS S3 Handler loaded.")
+
+    def get_buckets(self, access_key, secret_key, region):
+        try:
+            s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=region)
+            response = s3.list_buckets()
+            return [b['Name'] for b in response.get('Buckets', [])]
+        except Exception as e:
+            print(f"❌ S3 Error: {e}")
+            return []
+
+    def get_files(self, access_key, secret_key, region, bucket_name):
+        try:
+            s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=region)
+            response = s3.list_objects_v2(Bucket=bucket_name)
+            return [obj['Key'] for obj in response.get('Contents', [])]
+        except Exception as e:
+            return []
+
+    def download_file(self, access_key, secret_key, region, bucket_name, file_key):
+        try:
+            s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=region)
+            obj = s3.get_object(Bucket=bucket_name, Key=file_key)
+            return obj['Body'].read()
+        except Exception as e:
+            print(f"❌ S3 Download Error: {e}")
+            return b""
\ No newline at end of file
diff --git a/connectors/azure_handler.py b/connectors/azure_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..f164be2651a3101e2eba9ffc0dc4309000189ca0
--- /dev/null
+++ b/connectors/azure_handler.py
@@ -0,0 +1,32 @@
+from azure.storage.blob import BlobServiceClient
+
+class AzureBlobHandler:
+    def __init__(self):
+        print("✅ Azure Blob Handler loaded.")
+
+    def get_containers(self, conn_str):
+        try:
+            blob_service_client = BlobServiceClient.from_connection_string(conn_str)
+            containers = blob_service_client.list_containers()
+            return [c['name'] for c in containers]
+        except Exception as e:
+            print(f"❌ Azure Error: {e}")
+            return []
+
+    def get_blobs(self, conn_str, container_name):
+        try:
+            blob_service_client = BlobServiceClient.from_connection_string(conn_str)
+            container_client = blob_service_client.get_container_client(container_name)
+            blobs = container_client.list_blobs()
+            return [b['name'] for b in blobs]
+        except Exception as e:
+            return []
+
+    def download_blob(self, conn_str, container_name, blob_name):
+        try:
+            blob_service_client = BlobServiceClient.from_connection_string(conn_str)
+            blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
+            return blob_client.download_blob().readall()
+        except Exception as e:
+            print(f"❌ Azure Download Error: {e}")
+            return b""
\ No newline at end of file
diff --git a/connectors/confluence_handler.py b/connectors/confluence_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..8411668b19e88b54f65794104f01c70346a241c6
--- /dev/null
+++ b/connectors/confluence_handler.py
@@ -0,0 +1,44 @@
+import pandas as pd
+from atlassian import Confluence
+from bs4 import BeautifulSoup
+
+class ConfluenceHandler:
+    def __init__(self):
+        print("✅ Confluence Handler loaded.")
+
+    def fetch_page_content(self, url, username, api_token, page_id):
+        """
+        Fetches the body content of a specific Confluence page.
+        """
+        try:
+            # Initialize Confluence API
+            confluence = Confluence(
+                url=url,
+                username=username,
+                password=api_token,
+                cloud=True
+            )
+
+            # Get Page Content
+            page = confluence.get_page_by_id(page_id, expand='body.storage')
+            title = page.get('title', 'Unknown Title')
+            
+            # Extract HTML body
+            raw_html = page.get('body', {}).get('storage', {}).get('value', '')
+
+            # Clean HTML tags to get raw text for PII scanning
+            if raw_html:
+                clean_text = BeautifulSoup(raw_html, "html.parser").get_text(separator=' ')
+            else:
+                clean_text = ""
+
+            return pd.DataFrame([{
+                "Source": "Confluence",
+                "Sender": username,
+                "Subject": title,
+                "Content": clean_text
+            }])
+
+        except Exception as e:
+            print(f"❌ Confluence Error: {e}")
+            return pd.DataFrame()
\ No newline at end of file
diff --git a/connectors/drive_handler.py b/connectors/drive_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..881b31539d5cc52258cf9ed84aa0843ae6769296
--- /dev/null
+++ b/connectors/drive_handler.py
@@ -0,0 +1,52 @@
+import io
+import json
+from googleapiclient.discovery import build
+from googleapiclient.http import MediaIoBaseDownload
+from google.oauth2 import service_account
+
+class DriveHandler:
+    def __init__(self):
+        print("✅ Google Drive Handler loaded.")
+
+    def list_files(self, credentials_dict):
+        try:
+            creds = service_account.Credentials.from_service_account_info(
+                credentials_dict, scopes=['https://www.googleapis.com/auth/drive.readonly']
+            )
+            service = build('drive', 'v3', credentials=creds)
+            results = service.files().list(
+                pageSize=15, fields="files(id, name, mimeType)"
+            ).execute()
+            return results.get('files', [])
+        except Exception as e:
+            print(f"❌ Drive List Error: {e}")
+            return []
+
+    def download_file(self, file_id, mime_type, credentials_dict) -> bytes:
+        try:
+            creds = service_account.Credentials.from_service_account_info(
+                credentials_dict, scopes=['https://www.googleapis.com/auth/drive.readonly']
+            )
+            service = build('drive', 'v3', credentials=creds)
+            
+            # Export Google Docs to standard formats
+            if "spreadsheet" in mime_type:
+                request = service.files().export_media(fileId=file_id, mimeType='text/csv')
+            elif "document" in mime_type:
+                request = service.files().export_media(fileId=file_id, mimeType='application/pdf')
+            elif "presentation" in mime_type:
+                request = service.files().export_media(fileId=file_id, mimeType='application/pdf')
+            else:
+                # Download binary files directly
+                request = service.files().get_media(fileId=file_id)
+            
+            fh = io.BytesIO()
+            downloader = MediaIoBaseDownload(fh, request)
+            done = False
+            while done is False:
+                status, done = downloader.next_chunk()
+            
+            return fh.getvalue()
+        except Exception as e:
+            print(f"❌ Drive Download Error: {e}")
+            return b""
\ No newline at end of file
diff --git a/connectors/gcp_storage_handler.py b/connectors/gcp_storage_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..4de70fb00368aa8d00811cc5abea4cece40e056b
--- /dev/null
+++ b/connectors/gcp_storage_handler.py
@@ -0,0 +1,37 @@
+from google.cloud import storage
+from google.oauth2 import service_account
+
+class GcpStorageHandler:
+    def __init__(self):
+        print("✅ GCP Storage Handler loaded.")
+
+    def get_buckets(self, credentials_dict):
+        try:
+            credentials = service_account.Credentials.from_service_account_info(credentials_dict)
+            storage_client = storage.Client(credentials=credentials, project=credentials_dict.get('project_id'))
+            buckets = storage_client.list_buckets()
+            return [bucket.name for bucket in buckets]
+        except Exception as e:
+            print(f"❌ GCP Bucket Error: {e}")
+            return []
+
+    def get_files(self, credentials_dict, bucket_name):
+        try:
+            credentials = service_account.Credentials.from_service_account_info(credentials_dict)
+            storage_client = storage.Client(credentials=credentials, project=credentials_dict.get('project_id'))
+            blobs = storage_client.list_blobs(bucket_name)
+            return [blob.name for blob in blobs]
+        except Exception as e:
+            print(f"❌ GCP List Error: {e}")
+            return []
+
+    def download_file(self, credentials_dict, bucket_name, blob_name):
+        try:
+            credentials = service_account.Credentials.from_service_account_info(credentials_dict)
+            storage_client = storage.Client(credentials=credentials, project=credentials_dict.get('project_id'))
+            bucket = storage_client.bucket(bucket_name)
+            blob = bucket.blob(blob_name)
+            return blob.download_as_bytes()
+        except Exception as e:
+            print(f"❌ GCP Download Error: {e}")
+            return b""
\ No newline at end of file
diff --git a/connectors/gmail_handler.py b/connectors/gmail_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b845f3aea90a5e5aed00da58fa5bc25b21b5a2f
--- /dev/null
+++ b/connectors/gmail_handler.py
@@ -0,0 +1,77 @@
+import base64
+import os
+import pickle
+import pandas as pd
+from bs4 import BeautifulSoup
+from googleapiclient.discovery import build
+from google_auth_oauthlib.flow import InstalledAppFlow
+from google.auth.transport.requests import Request
+
+class GmailHandler:
+    def __init__(self):
+        print("✅ Gmail Handler loaded.")
+
+    def fetch_emails(self, credentials_file, num_emails=10) -> pd.DataFrame:
+        """
+        Authenticates and fetches emails from Gmail.
+        """
+        try:
+            SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
+            creds = None
+            token_path = 'token.pickle'
+            
+            if os.path.exists(token_path):
+                with open(token_path, 'rb') as token:
+                    creds = pickle.load(token)
+            
+            if not creds or not creds.valid:
+                if creds and creds.expired and creds.refresh_token:
+                    creds.refresh(Request())
+                else:
+                    # Write temp file because flow requires file path
+                    with open("temp_client_secret.json", "wb") as f:
+                        f.write(credentials_file.getvalue())
+                    
+                    flow = InstalledAppFlow.from_client_secrets_file('temp_client_secret.json', SCOPES)
+                    creds = flow.run_local_server(port=0)
+                    
+                    with open(token_path, 'wb') as token:
+                        pickle.dump(creds, token)
+                    
+                    if os.path.exists("temp_client_secret.json"):
+                        os.remove("temp_client_secret.json")
+
+            service = build('gmail', 'v1', credentials=creds)
+            results = service.users().messages().list(userId='me', maxResults=num_emails).execute()
+            messages = results.get('messages', [])
+            
+            email_data = []
+            for message in messages:
+                msg = service.users().messages().get(userId='me', id=message['id']).execute()
+                payload = msg['payload']
+                headers = payload.get("headers")
+                
+                subject = next((h['value'] for h in headers if h['name'] == 'Subject'), "No Subject")
+                sender = next((h['value'] for h in headers if h['name'] == 'From'), "Unknown")
+
+                body = ""
+                if 'parts' in payload:
+                    for part in payload['parts']:
+                        if part['mimeType'] == 'text/plain' and 'data' in part['body']:
+                            body += base64.urlsafe_b64decode(part['body']['data']).decode()
+                elif 'body' in payload and 'data' in payload['body']:
+                     body += base64.urlsafe_b64decode(payload['body']['data']).decode()
+
+                clean_body = BeautifulSoup(body, "html.parser").get_text()
+                email_data.append({
+                    "Source": "Gmail",
+                    "Sender": sender,
+                    "Subject": subject,
+                    "Content": f"Subject: {subject}\n\n{clean_body}"
+                })
+            
+            return pd.DataFrame(email_data)
+
+        except Exception as e:
+            print(f"❌ Gmail Error: {e}")
+            return pd.DataFrame()
\ No newline at end of file
diff --git a/connectors/mongo_handler.py b/connectors/mongo_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..f35430acfb15c44ba272024780af5160e3c616af
--- /dev/null
+++ b/connectors/mongo_handler.py
@@ -0,0 +1,45 @@
+import pandas as pd
+from urllib.parse import quote_plus
+
+class MongoHandler:
+    def __init__(self):
+        try:
+            import pymongo
+            self.pymongo = pymongo
+            print("✅ MongoDB Handler loaded.")
+        except ImportError:
+            self.pymongo = None
+            print("❌ PyMongo not installed.")
+
+    def fetch_data(self, host, port, db, user, pw, collection):
+        if not self.pymongo:
+            return pd.DataFrame()
+        
+        try:
+            if user and pw:
+                safe_user = quote_plus(user)
+                safe_pw = quote_plus(pw)
+                uri = f"mongodb://{safe_user}:{safe_pw}@{host}:{port}/"
+            else:
+                uri = f"mongodb://{host}:{port}/"
+            
+            client = self.pymongo.MongoClient(uri, serverSelectionTimeoutMS=5000)
+            # Check connection
+            client.server_info() 
+            
+            cursor = client[db][collection].find().limit(100)
+            data = list(cursor)
+            
+            if not data:
+                return pd.DataFrame()
+            
+            # Normalize ObjectIds to strings
+            for d in data:
+                if '_id' in d:
+                    d['_id'] = str(d['_id'])
+            
+            return pd.json_normalize(data)
+            
+        except Exception as e:
+            print(f"❌ Mongo Error: {e}")
+            return pd.DataFrame()
\ No newline at end of file
diff --git a/connectors/mysql_handler.py b/connectors/mysql_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..8dfc0785fbd28a54dcf6192fadeb9a3c79d13679
--- /dev/null
+++ b/connectors/mysql_handler.py
@@ -0,0 +1,23 @@
+import pandas as pd
+from sqlalchemy import create_engine
+from urllib.parse import quote_plus
+
+class MysqlHandler:
+    def __init__(self):
+        print("✅ MySQL Handler loaded.")
+
+    def fetch_data(self, host, port, db, user, pw, table):
+        """
+        Connects to MySQL and fetches the first 100 rows of a table.
+        """
+        try:
+            safe_pw = quote_plus(pw)
+            # Uses mysql+pymysql driver
+            conn_str = f"mysql+pymysql://{user}:{safe_pw}@{host}:{port}/{db}"
+            engine = create_engine(conn_str)
+            
+            query = f"SELECT * FROM {table} LIMIT 100"
+            return pd.read_sql(query, engine)
+        except Exception as e:
+            print(f"❌ MySQL Error: {e}")
+            return pd.DataFrame()
\ No newline at end of file
diff --git a/connectors/postgres_handler.py b/connectors/postgres_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..69244b4c779e0c3d50c059243ca7a247cc79cba8
--- /dev/null
+++ b/connectors/postgres_handler.py
@@ -0,0 +1,23 @@
+import pandas as pd
+from sqlalchemy import create_engine
+from urllib.parse import quote_plus
+
+class PostgresHandler:
+    def __init__(self):
+        print("✅ PostgreSQL Handler loaded.")
+
+    def fetch_data(self, host, port, db, user, pw, table):
+        """
+        Connects to PostgreSQL and fetches the first 100 rows of a table.
+        """
+        try:
+            safe_pw = quote_plus(pw)
+            # SQLAlchemy connection string
+            conn_str = f"postgresql://{user}:{safe_pw}@{host}:{port}/{db}"
+            engine = create_engine(conn_str)
+            
+            query = f"SELECT * FROM {table} LIMIT 100"
+            return pd.read_sql(query, engine)
+        except Exception as e:
+            print(f"❌ PostgreSQL Error: {e}")
+            return pd.DataFrame()
\ No newline at end of file
diff --git a/connectors/slack_handler.py b/connectors/slack_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e73b982283e32d659dfd4aa7433f935bce21772
--- /dev/null
+++ b/connectors/slack_handler.py
@@ -0,0 +1,47 @@
+import pandas as pd
+from slack_sdk import WebClient
+from slack_sdk.errors import SlackApiError
+import datetime
+
+class SlackHandler:
+    def __init__(self):
+        print("✅ Slack Handler loaded.")
+
+    def fetch_messages(self, token, channel_id, num_messages=20):
+        """
+        Fetches recent messages from a specific Slack channel.
+        """
+        try:
+            client = WebClient(token=token)
+            # Fetch conversation history
+            response = client.conversations_history(channel=channel_id, limit=num_messages)
+            
+            messages = []
+            if response['ok']:
+                for msg in response['messages']:
+                    # Skip subtypes like 'channel_join', only process actual text
+                    if 'subtype' not in msg:
+                        user_id = msg.get('user', 'Unknown')
+                        text = msg.get('text', '')
+                        ts = float(msg.get('ts', 0))
+                        time_str = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
+                        
+                        messages.append({
+                            "Source": "Slack",
+                            "Sender": user_id, 
+                            "Subject": f"Message in {channel_id} at {time_str}",
+                            "Content": text
+                        })
+            
+            if not messages:
+                print("⚠️ No messages found in channel.")
+                return pd.DataFrame()
+                
+            return pd.DataFrame(messages)
+            
+        except SlackApiError as e:
+            print(f"❌ Slack API Error: {e.response['error']}")
+            return pd.DataFrame()
+        except Exception as e:
+            print(f"❌ Slack Handler Error: {e}")
+            return pd.DataFrame()
\ No newline at end of file
diff --git a/file_handlers/__init__.py b/file_handlers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/file_handlers/__pycache__/__init__.cpython-313.pyc b/file_handlers/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f8442ed10e38a6901629379f42b944c5831236d2
Binary files /dev/null and b/file_handlers/__pycache__/__init__.cpython-313.pyc differ
diff --git a/file_handlers/__pycache__/avro_handler.cpython-313.pyc b/file_handlers/__pycache__/avro_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..adfe8a82a5af893d5bf425fd0b299614c6f457d9
Binary files /dev/null and b/file_handlers/__pycache__/avro_handler.cpython-313.pyc differ
diff --git a/file_handlers/__pycache__/json_handler.cpython-313.pyc b/file_handlers/__pycache__/json_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..337a40ed119538367062e58aa8063fa5e4bbbfa4
Binary files /dev/null and b/file_handlers/__pycache__/json_handler.cpython-313.pyc differ
diff --git a/file_handlers/__pycache__/ocr_engine.cpython-313.pyc b/file_handlers/__pycache__/ocr_engine.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9c1463f41999a9ecbd01865f527dcd3086c254ff
Binary files /dev/null and b/file_handlers/__pycache__/ocr_engine.cpython-313.pyc differ
diff --git a/file_handlers/__pycache__/parquet_handler.cpython-313.pyc b/file_handlers/__pycache__/parquet_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ea190a2ad66cc6721e6971b5fed7e3a03a93054d
Binary files /dev/null and b/file_handlers/__pycache__/parquet_handler.cpython-313.pyc differ
diff --git a/file_handlers/__pycache__/pdf_handler.cpython-313.pyc b/file_handlers/__pycache__/pdf_handler.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c9607b9dd3b2af360d05ab2a1fc407a1f035245c
Binary files /dev/null and b/file_handlers/__pycache__/pdf_handler.cpython-313.pyc differ
diff --git a/file_handlers/avro_handler.py b/file_handlers/avro_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..14bc33418b8340c8a7e382f5b231d29cf3e529bc
--- /dev/null
+++ b/file_handlers/avro_handler.py
@@ -0,0 +1,36 @@
+# avro_handler.py
+import io
+import pandas as pd
+
+class AvroHandler:
+    def __init__(self):
+        self.available = False
+        try:
+            import fastavro
+            self.fastavro = fastavro
+            self.available = True
+            print("✅ Avro Handler loaded.")
+        except ImportError:
+            print("❌ fastavro not found. Please run: pip install fastavro")
+
+    def convert_to_dataframe(self, file_bytes: bytes) -> pd.DataFrame:
+        """
+        Reads Avro bytes and converts them to a Pandas DataFrame.
+        """
+        if not self.available:
+            return pd.DataFrame()
+
+        try:
+            # Create a file-like object from bytes
+            f = io.BytesIO(file_bytes)
+            # Use fastavro to read records
+            reader = self.fastavro.reader(f)
+            records = [r for r in reader]
+            
+            if not records:
+                return pd.DataFrame()
+                
+            return pd.DataFrame(records)
+        except Exception as e:
+            print(f"⚠️ Avro Read Error: {e}")
+            return pd.DataFrame()
\ No newline at end of file
diff --git a/file_handlers/json_handler.py b/file_handlers/json_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..2736d9166fbfa5190751f6e4c3359f738d3cbd9f
--- /dev/null
+++ b/file_handlers/json_handler.py
@@ -0,0 +1,39 @@
+import json
+import pandas as pd
+import io
+
+class JsonHandler:
+    def __init__(self):
+        print("✅ JSON Handler loaded.")
+
+    def read_file(self, file_obj) -> pd.DataFrame:
+        """
+        Reads a JSON file object (or Streamlit UploadedFile) and flattens it.
+        """
+        try:
+            # Handle Streamlit UploadedFile (bytes) vs standard file path
+            if hasattr(file_obj, "getvalue"):
+                content = file_obj.getvalue()
+                data = json.loads(content.decode('utf-8'))
+            else:
+                data = json.load(file_obj)
+            
+            # Recursive function to flatten nested JSONs
+            def flatten(x, name=''):
+                if type(x) is dict:
+                    out = {}
+                    for a in x: out.update(flatten(x[a], name + a + '_'))
+                    return out
+                elif type(x) is list:
+                    return {f"{name}list": str(x)}
+                else: return {name[:-1]: x}
+            
+            # Normalize to DataFrame
+            if isinstance(data, list): 
+                return pd.DataFrame([flatten(x) for x in data])
+            
+            return pd.DataFrame([flatten(data)])
+
+        except Exception as e:
+            print(f"❌ JSON Read Error: {e}")
+            return pd.DataFrame()
\ No newline at end of file
diff --git a/file_handlers/ocr_engine.py b/file_handlers/ocr_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee6054b74213ce70f86e95063b7372b28322f17b
--- /dev/null
+++ b/file_handlers/ocr_engine.py
@@ -0,0 +1,35 @@
+# ocr_engine.py
+import pytesseract
+from PIL import Image
+import io
+
+class OcrEngine:
+    def __init__(self):
+        """
+        Initializes the OCR Engine using Tesseract.
+        """
+        self.available = False
+        try:
+            # Check availability by querying version
+            pytesseract.get_tesseract_version()
+            print("✅ Tesseract OCR Engine loaded.")
+            self.available = True
+        except Exception as e:
+            print(f"❌ Tesseract OCR not found: {e}")
+            print("👉 Install Tesseract system-wide (e.g., 'apt-get install tesseract-ocr') and 'pip install pytesseract'.")
+
+    def extract_text(self, image_bytes: bytes) -> str:
+        """
+        Converts image bytes to text.
+        """
+        if not self.available:
+            return ""
+        
+        try:
+            image = Image.open(io.BytesIO(image_bytes))
+            # Perform OCR
+            text = pytesseract.image_to_string(image)
+            return text
+        except Exception as e:
+            print(f"⚠️ OCR Extraction Error: {e}")
+            return ""
\ No newline at end of file
diff --git a/file_handlers/parquet_handler.py b/file_handlers/parquet_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..009823af582d64da0f2629b444d6e9ba5901d43c
--- /dev/null
+++ b/file_handlers/parquet_handler.py
@@ -0,0 +1,25 @@
+import io
+import pandas as pd
+
+class ParquetHandler:
+    def __init__(self):
+        self.available = False
+        try:
+            import pyarrow.parquet as pq
+            self.available = True
+            print("✅ Parquet Handler loaded.")
+        except ImportError:
+            print("❌ PyArrow not found. Please run: pip install pyarrow")
+
+    def convert_to_dataframe(self, file_bytes: bytes) -> pd.DataFrame:
+        """
+        Reads Parquet bytes and converts them to a Pandas DataFrame.
+        """
+        if not self.available:
+            return pd.DataFrame()
+
+        try:
+            return pd.read_parquet(io.BytesIO(file_bytes))
+        except Exception as e:
+            print(f"⚠️ Parquet Read Error: {e}")
+            return pd.DataFrame()
\ No newline at end of file
diff --git a/file_handlers/pdf_handler.py b/file_handlers/pdf_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..a46e2597bb58e51ced02c4412a4f901ba8a70279
--- /dev/null
+++ b/file_handlers/pdf_handler.py
@@ -0,0 +1,74 @@
+import fitz  # PyMuPDF
+import io
+
+class PdfHandler:
+    def __init__(self, ocr_engine):
+        """
+        :param ocr_engine: Instance of OcrEngine to handle scanned pages.
+        """
+        self.ocr_engine = ocr_engine
+        print("✅ PDF Handler loaded.")
+
+    def get_total_pages(self, file_bytes: bytes) -> int:
+        try:
+            doc = fitz.open(stream=file_bytes, filetype="pdf")
+            return len(doc)
+        except:
+            return 0
+
+    def get_page_text(self, file_bytes: bytes, page_num: int) -> str:
+        """
+        Extracts text from a specific page. Falls back to OCR if text is empty.
+        """
+        try:
+            doc = fitz.open(stream=file_bytes, filetype="pdf")
+            if not (0 <= page_num < len(doc)): return ""
+            
+            page = doc[page_num]
+            text = page.get_text("text")
+            
+            # OCR Fallback for scanned PDFs
+            if not text.strip() and self.ocr_engine.available:
+                print(f"⚠️ Page {page_num+1} appears empty/scanned. Running OCR...")
+                pix = page.get_pixmap()
+                img_bytes = pix.tobytes("png")
+                text = self.ocr_engine.extract_text(img_bytes)
+                
+            return text
+        except Exception as e:
+            print(f"PDF Text Error: {e}")
+            return ""
+
+    def render_labeled_image(self, file_bytes: bytes, page_num: int, matches: list, color_map: dict) -> bytes:
+        """
+        Draws bounding boxes around detected PII on the PDF page image.
+        """
+        try:
+            doc = fitz.open(stream=file_bytes, filetype="pdf")
+            if not (0 <= page_num < len(doc)): return None
+            
+            page = doc[page_num]
+            
+            # Draw rectangles for each match
+            for m in matches:
+                # Get color for this PII type (normalize 0-255 rgb to 0-1 for PyMuPDF)
+                # color_map values are hex strings or tuples. Assuming the backend passes hex or we default.
+                # Simplification: Use Red for all boxes for visibility, or logic below:
+                color_norm = (1, 0, 0) # Default Red
+                
+                # Search for the text string on the page
+                quads = page.search_for(m['text'])
+                
+                for q in quads:
+                    # Draw Box
+                    page.draw_rect(q, color=color_norm, width=1.5, fill=color_norm, fill_opacity=0.2)
+                    # Add Label
+                    page.insert_text(fitz.Point(q.x0, q.y0-2), m['label'], fontsize=6, color=(0,0,0))
+            
+            # Render page to image
+            pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # Zoom=2 for higher quality
+            return pix.tobytes("png")
+            
+        except Exception as e:
+            print(f"PDF Render Error: {e}")
+            return None
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index ebab1ca6e4d7ea86dd1faadde8ca22ab98eb8c23..a71b6e41a3778f06becb6c80def5dbe0ecc7a0f4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,6 @@
 pypdf
+pymupdf
+beautifulsoup4
 pandas
 streamlit
 nltk
@@ -14,4 +16,10 @@ spacy
 boto3
 presidio-analyzer
 azure-storage-blob
-google-cloud-storage
\ No newline at end of file
+google-cloud-storage
+pytesseract
+Pillow
+fastavro
+gliner
+slack_sdk
+atlassian-python-api
\ No newline at end of file
diff --git a/token.pickle b/token.pickle
new file mode 100644
index 0000000000000000000000000000000000000000..e26fa18b87413893ddedeb225f4d9301f02a9d84
Binary files /dev/null and b/token.pickle differ
diff --git a/ui.py b/ui.py
index 26d57dd4048e50e7e1fe86371ba21cc02fbd51b7..d8e168bc0a67cead5572838c7dd2bb49204a51ba 100644
--- a/ui.py
+++ b/ui.py
@@ -13,61 +13,14 @@ def main():
         st.session_state.classifier = RegexClassifier()
     if 'page_number' not in st.session_state:
         st.session_state.page_number = 0
-    
     if 'last_accuracy' not in st.session_state:
-        st.session_state.last_accuracy = {"🛠️ Regex": 0, "🧠 NLTK": 0, "🤖 SpaCy": 0, "🛡️ Presidio": 0}
+        st.session_state.last_accuracy = {"🛠️ Regex": 0, "🧠 NLTK": 0, "🤖 SpaCy": 0, "🛡️ Presidio": 0, "🦅 GLiNER": 0}
 
     classifier = st.session_state.classifier
 
-    with st.sidebar:
-        st.header("1. Source Selection")
-        main_category = st.selectbox("Select System", ["File System", "Databases", "Cloud Storage"])
-        source = None
-        file_sub_type = None
-        
-        if main_category == "File System":
-            struct_type = st.radio("Data Type", ["Structured Data", "Unstructured Data"])
-            if struct_type == "Structured Data":
-                file_sub_type = st.selectbox("File Format", ["CSV", "JSON", "Parquet"])
-            else:
-                file_sub_type = st.selectbox("File Format", ["PDF"])
-            source = "File Upload"
-
-        elif main_category == "Databases":
-            db_type = st.radio("Database Type", ["Relational (SQL)", "Non-Relational (NoSQL)"])
-            if db_type == "Relational (SQL)":
-                db_icons = {"PostgreSQL": "🐘 PostgreSQL", "MySQL": "🐬 MySQL"}
-                source = st.selectbox("Select Database", ["PostgreSQL", "MySQL"], format_func=lambda x: db_icons.get(x))
-            else:
-                db_icons = {"MongoDB": "🍃 MongoDB"}
-                source = st.selectbox("Select Database", ["MongoDB"], format_func=lambda x: db_icons.get(x))
-
-        elif main_category == "Cloud Storage":
-            source = st.selectbox("Service", ["Google Drive", "AWS S3", "Azure Blob Storage", "Google Cloud Storage"])
-
-        st.divider()
-        st.header("2. Patterns")
-        patterns = classifier.list_patterns()
-        ordered_keys = ["EMAIL", "FIRST_NAME", "LAST_NAME", "PHONE", "SSN", "CREDIT_CARD"]
-        display_patterns = {k: patterns.get(k, "NLTK/SpaCy/Presidio") for k in ordered_keys if k in patterns or k in ["FIRST_NAME", "LAST_NAME"]}
-        for k, v in patterns.items():
-            if k not in display_patterns: display_patterns[k] = v     
-        st.dataframe(pd.DataFrame(list(display_patterns.items()), columns=["Name", "Regex/Method"]), hide_index=True)
-        
-        with st.expander("➕ Add Pattern"):
-            new_name = st.text_input("Name")
-            new_regex = st.text_input("Regex")
-            if st.button("Add"):
-                classifier.add_pattern(new_name, new_regex)
-                st.rerun()
-
-        with st.expander("🗑️ Remove Pattern"):
-            pattern_to_remove = st.selectbox("Select Pattern", options=list(patterns.keys()))
-            if st.button("Remove Selected"):
-                classifier.remove_pattern(pattern_to_remove)
-                st.rerun()
-
+    # ================= HELPER FUNCTIONS =================
     def render_source_header(title, logo_url):
+        """Helper to render headers consistently"""
         col1, col2 = st.columns([0.1, 0.9])
         with col1:
             if logo_url: st.image(logo_url, width=50)
@@ -83,15 +36,12 @@ def main():
             if results_df.empty:
                 st.info("No PII detected by any model.")
                 return
-            display_df = results_df[["Model", "Detected PII", "Missed PII"]]
-            st.table(display_df)
+            st.table(results_df[["Model", "Detected PII", "Missed PII"]])
             col1, col2 = st.columns([2, 1])
             with col1:
-                st.markdown("**Model Accuracy Graph**")
                 fig = px.bar(results_df, x="Accuracy", y="Model", orientation='h', color="Model", text_auto='.2%', range_x=[0,1])
                 st.plotly_chart(fig, use_container_width=True)
             with col2:
-                st.markdown("**Efficiency Gain**")
                 for index, row in results_df.iterrows():
                     model = row['Model']
                     current_acc = row['Accuracy']
@@ -104,8 +54,7 @@ def main():
         if source_df is not None and not source_df.empty:
             st.markdown("### 🧬 Data Schema Detected")
             with st.expander("View Column Types & Samples", expanded=False):
-                schema_df = classifier.get_data_schema(source_df)
-                st.dataframe(schema_df, use_container_width=True, hide_index=True)
+                st.dataframe(classifier.get_data_schema(source_df), use_container_width=True, hide_index=True)
             st.divider()
         st.markdown("### 📊 PII Analytics")
         if count_df.empty:
@@ -118,43 +67,100 @@ def main():
         with c2:
             st.dataframe(count_df, hide_index=True, use_container_width=True)
 
-    if source == "File Upload":
-        ext_map = {"PDF": ["pdf"], "CSV": ["csv"], "JSON": ["json"], "Parquet": ["parquet", "pqt"]}
-        accepted_exts = ext_map.get(file_sub_type, [])
-        st.subheader(f"📂 {file_sub_type} Analysis")
-        uploaded_file = st.file_uploader(f"Upload {file_sub_type}", type=accepted_exts)
+    # ================= SIDEBAR =================
+    with st.sidebar:
+        st.header("1. Source Selection")
+        main_category = st.selectbox("Select System", ["File System", "Databases", "Cloud Storage", "Enterprise Connectors"])
+        source = None
+        file_sub_type = None
         
+        if main_category == "File System":
+            struct_type = st.radio("Data Type", ["Structured Data", "Unstructured Data"])
+            if struct_type == "Structured Data":
+                file_sub_type = st.selectbox("File Format", ["CSV", "JSON", "Parquet", "Apache Avro"])
+            else:
+                file_sub_type = st.selectbox("File Format", ["PDF", "Image (OCR)"])
+            source = "File Upload"
+        
+        elif main_category == "Databases":
+            db_type = st.radio("Database Type", ["Relational (SQL)", "Non-Relational (NoSQL)"])
+            if db_type == "Relational (SQL)":
+                db_icons = {"PostgreSQL": "🐘 PostgreSQL", "MySQL": "🐬 MySQL"}
+                source = st.selectbox("Select Database", ["PostgreSQL", "MySQL"], format_func=lambda x: db_icons.get(x))
+            else:
+                db_icons = {"MongoDB": "🍃 MongoDB"}
+                source = st.selectbox("Select Database", ["MongoDB"], format_func=lambda x: db_icons.get(x))
+        elif main_category == "Cloud Storage":
+            source = st.selectbox("Service", ["Google Drive", "AWS S3", "Azure Blob Storage", "Google Cloud Storage"])
+        
+        elif main_category == "Enterprise Connectors":
+            source = st.selectbox("Platform", ["Gmail", "Slack", "Confluence"])
+
+        st.divider()
+        st.header("2. Patterns")
+        patterns = classifier.list_patterns()
+        st.dataframe(pd.DataFrame(list(patterns.items()), columns=["Name", "Regex"]), hide_index=True)
+        
+        with st.expander("➕ Add Pattern"):
+            new_name = st.text_input("Name")
+            new_regex = st.text_input("Regex")
+            if st.button("Add"):
+                classifier.add_pattern(new_name, new_regex)
+                st.rerun()
+        with st.expander("🗑️ Remove Pattern"):
+            pattern_to_remove = st.selectbox("Select Pattern", options=list(patterns.keys()))
+            if st.button("Remove Selected"):
+                classifier.remove_pattern(pattern_to_remove)
+                st.rerun()
+
+    # ================= MAIN LOGIC =================
+    
+    # 1. FILE UPLOAD
+    if source == "File Upload":
+        uploaded_file = st.file_uploader(f"Upload {file_sub_type}")
         if uploaded_file:
             mask_mode = st.checkbox("🔒 Enable PII Masking")
+            file_bytes = uploaded_file.getvalue()
+            
             if file_sub_type == 'PDF':
-                file_bytes = uploaded_file.getvalue()
-                current_text = classifier.get_pdf_page_text(file_bytes, st.session_state.page_number)
-                count_df = classifier.get_pii_counts(current_text)
+                text = classifier.get_pdf_page_text(file_bytes, st.session_state.page_number)
+                count_df = classifier.get_pii_counts(text)
                 render_analytics(count_df, None)
-                render_inspector(current_text)
+                render_inspector(text)
                 
                 total_pages = classifier.get_pdf_total_pages(file_bytes)
-                c1, c2, c3 = st.columns([1, 2, 1])
-                with c1: 
-                    if st.button("⬅️ Prev") and st.session_state.page_number > 0: st.session_state.page_number -= 1
-                with c3:
-                    if st.button("Next ➡️") and st.session_state.page_number < total_pages - 1: st.session_state.page_number += 1
+                c1, c2, c3 = st.columns([1,2,1])
+                if c1.button("Prev"): st.session_state.page_number = max(0, st.session_state.page_number - 1)
+                if c3.button("Next"): st.session_state.page_number = min(total_pages-1, st.session_state.page_number + 1)
                 
                 st.markdown(f"**Viewing Page {st.session_state.page_number + 1} of {total_pages}**")
                 img = classifier.get_labeled_pdf_image(file_bytes, st.session_state.page_number)
                 if img: st.image(img, use_container_width=True)
-            else:
-                if file_sub_type == 'Parquet': df = classifier.get_parquet_data(uploaded_file.getvalue())
-                elif file_sub_type == 'CSV': df = pd.read_csv(uploaded_file)
-                else: df = classifier.get_json_data(uploaded_file) 
+            
+            elif file_sub_type == 'Image (OCR)':
+                st.image(uploaded_file, width=400)
+                with st.spinner("Running OCR..."):
+                    text = classifier.get_ocr_text_from_image(file_bytes)
+                if text:
+                    df = pd.DataFrame({"Content": [text]})
+                    render_analytics(classifier.get_pii_counts_dataframe(df), df)
+                    render_inspector(text)
+                    if mask_mode: st.dataframe(classifier.mask_dataframe(df))
+                    else: st.markdown(classifier.scan_dataframe_with_html(df).to_html(escape=False), unsafe_allow_html=True)
+                else: st.warning("No text extracted.")
 
+            else: # Structured
+                if file_sub_type == 'Parquet': df = classifier.get_parquet_data(file_bytes)
+                elif file_sub_type == 'Apache Avro': df = classifier.get_avro_data(file_bytes)
+                elif file_sub_type == 'CSV': df = pd.read_csv(io.BytesIO(file_bytes))
+                else: df = classifier.get_json_data(uploaded_file)
+                
                 render_analytics(classifier.get_pii_counts_dataframe(df), df)
-                sample_text = df.head(10).to_string()
-                render_inspector(sample_text)
-
+                render_inspector(df.head(10).to_string())
                 if mask_mode: st.dataframe(classifier.mask_dataframe(df).head(50))
                 else: st.markdown(classifier.scan_dataframe_with_html(df.head(50)).to_html(escape=False), unsafe_allow_html=True)
 
+    # 2. DATABASES
     elif source in ["PostgreSQL", "MySQL", "MongoDB"]:
         db_logos = {
             "PostgreSQL": "https://upload.wikimedia.org/wikipedia/commons/2/29/Postgresql_elephant.svg",
@@ -184,155 +190,162 @@ def main():
         if 'db_data' in st.session_state:
             df = st.session_state.db_data
             render_analytics(classifier.get_pii_counts_dataframe(df), df)
-            sample_text = df.head(10).to_string()
-            render_inspector(sample_text)
+            render_inspector(df.head(10).to_string())
             st.dataframe(classifier.mask_dataframe(df))
 
-    elif source == "Google Drive":
-        render_source_header("Google Drive Import", "https://upload.wikimedia.org/wikipedia/commons/d/da/Google_Drive_logo.png")
-        st.info("Upload your Service Account JSON to connect dynamically.")
-        creds_file = st.file_uploader("Upload credentials.json", type=['json'])
-        if creds_file:
-            creds_dict = json.load(creds_file)
-            st.session_state.creds_dict = creds_dict
-            st.success("Credentials Loaded!")
-            if st.button("📂 List Files"):
-                st.session_state.drive_files = classifier.get_google_drive_files(creds_dict)
-        if 'drive_files' in st.session_state:
-            files = st.session_state.drive_files
-            if not files: st.warning("No files found.")
-            else:
-                file_map = {f['name']: f for f in files}
-                selected_name = st.selectbox("Select File", list(file_map.keys()))
-                if st.button("⬇️ Scan File"):
-                    sel_file = file_map[selected_name]
-                    content = classifier.download_drive_file(sel_file['id'], sel_file.get('mimeType', ''), st.session_state.creds_dict)
-                    if not content: st.error("Failed to read.")
-                    else:
-                        st.success(f"Scanning {selected_name}...")
-                        # Reuse scan logic ...
-                        # (omitted for brevity, same as S3)
+    # 3. CLOUD STORAGE
+    elif source in ["Google Drive", "AWS S3", "Azure Blob Storage", "Google Cloud Storage"]:
+        logos = {
+            "Google Drive": "https://upload.wikimedia.org/wikipedia/commons/d/da/Google_Drive_logo.png",
+            "AWS S3": "https://upload.wikimedia.org/wikipedia/commons/9/93/Amazon_Web_Services_Logo.svg",
+            "Azure Blob Storage": "https://upload.wikimedia.org/wikipedia/commons/f/fa/Microsoft_Azure.svg",
+            "Google Cloud Storage": "https://upload.wikimedia.org/wikipedia/commons/5/51/Google_Cloud_logo.svg"
+        }
+        render_source_header(f"{source} Import", logos.get(source, ""))
+        
+        # --- GOOGLE DRIVE ---
+        if source == "Google Drive":
+            st.info("Upload Service Account JSON")
+            creds_file = st.file_uploader("credentials.json", type=['json'], key="gdrive")
+            if creds_file:
+                creds_dict = json.load(creds_file)
+                if st.button("📂 List Files"):
+                    st.session_state.drive_files = classifier.get_google_drive_files(creds_dict)
+                    st.session_state.gdrive_creds = creds_dict
+            
+            if 'drive_files' in st.session_state:
+                file_map = {f['name']: f for f in st.session_state.drive_files}
+                sel = st.selectbox("Select File", list(file_map.keys()))
+                if st.button("⬇️ Scan"):
+                    content = classifier.download_drive_file(file_map[sel]['id'], file_map[sel]['mimeType'], st.session_state.gdrive_creds)
+                    if isinstance(content, bytes):
+                        try:
+                            txt = content.decode('utf-8')
+                            df = pd.DataFrame({"Content": [txt]})
+                            render_analytics(classifier.get_pii_counts_dataframe(df), df)
+                            render_inspector(txt)
+                            st.markdown(classifier.scan_dataframe_with_html(df).to_html(escape=False), unsafe_allow_html=True)
+                        except: st.warning("Binary file downloaded.")
 
-    elif source == "AWS S3":
-        render_source_header("AWS S3 Import", "https://upload.wikimedia.org/wikipedia/commons/9/93/Amazon_Web_Services_Logo.svg")
-        c1, c2, c3 = st.columns(3)
-        aws_access = c1.text_input("Access Key ID")
-        aws_secret = c2.text_input("Secret Access Key", type="password")
-        aws_region = c3.text_input("Region", "us-east-1")
+        # --- AWS S3 ---
+        elif source == "AWS S3":
+            c1, c2, c3 = st.columns(3)
+            aws_a = c1.text_input("Access Key")
+            aws_s = c2.text_input("Secret Key", type="password")
+            aws_r = c3.text_input("Region", "us-east-1")
+            if st.button("Connect"):
+                st.session_state.s3_buckets = classifier.get_s3_buckets(aws_a, aws_s, aws_r)
+                st.session_state.aws_creds = (aws_a, aws_s, aws_r)
+            
+            if 's3_buckets' in st.session_state:
+                sel_b = st.selectbox("Bucket", st.session_state.s3_buckets)
+                if st.button("List"):
+                    st.session_state.s3_files = classifier.get_s3_files(*st.session_state.aws_creds, sel_b)
+                if 's3_files' in st.session_state:
+                    sel_f = st.selectbox("File", st.session_state.s3_files)
+                    if st.button("Scan"):
+                        content = classifier.download_s3_file(*st.session_state.aws_creds, sel_b, sel_f)
+                        try:
+                            df = pd.read_csv(io.BytesIO(content))
+                            render_analytics(classifier.get_pii_counts_dataframe(df), df)
+                            st.dataframe(classifier.mask_dataframe(df))
+                        except: st.error("Only CSV supported.")
 
-        if st.button("🔗 Connect to AWS"):
-            buckets = classifier.get_s3_buckets(aws_access, aws_secret, aws_region)
-            if buckets:
-                st.session_state.aws_creds = (aws_access, aws_secret, aws_region)
-                st.session_state.s3_buckets = buckets
-                st.success(f"Connected! Found {len(buckets)} buckets.")
-            else: st.error("Connection Failed.")
+        # --- AZURE ---
+        elif source == "Azure Blob Storage":
+            conn = st.text_input("Connection String", type="password")
+            if st.button("Connect"):
+                st.session_state.az_conts = classifier.get_azure_containers(conn)
+                st.session_state.az_conn = conn
+            
+            if 'az_conts' in st.session_state:
+                sel_c = st.selectbox("Container", st.session_state.az_conts)
+                if st.button("List"):
+                    st.session_state.az_blobs = classifier.get_azure_blobs(st.session_state.az_conn, sel_c)
+                if 'az_blobs' in st.session_state:
+                    sel_b = st.selectbox("Blob", st.session_state.az_blobs)
+                    if st.button("Scan"):
+                        content = classifier.download_azure_blob(st.session_state.az_conn, sel_c, sel_b)
+                        try:
+                            df = pd.read_csv(io.BytesIO(content))
+                            render_analytics(classifier.get_pii_counts_dataframe(df), df)
+                            st.dataframe(classifier.mask_dataframe(df))
+                        except: st.error("Only CSV supported.")
 
-        if 's3_buckets' in st.session_state:
-            selected_bucket = st.selectbox("Select Bucket", st.session_state.s3_buckets)
-            if st.button("📂 List Files"):
-                creds = st.session_state.aws_creds
-                st.session_state.s3_files = classifier.get_s3_files(creds[0], creds[1], creds[2], selected_bucket)
+        # --- GCP ---
+        elif source == "Google Cloud Storage":
+            st.info("Upload Service Account JSON")
+            gcp_f = st.file_uploader("service-account.json", type=['json'], key="gcp")
+            if gcp_f:
+                creds = json.load(gcp_f)
+                if st.button("Connect"):
+                    st.session_state.gcp_buckets = classifier.get_gcs_buckets(creds)
+                    st.session_state.gcp_creds = creds
             
-            if 's3_files' in st.session_state and st.session_state.s3_files:
-                selected_file = st.selectbox("Select File", st.session_state.s3_files)
-                if st.button("⬇️ Download & Scan"):
-                    creds = st.session_state.aws_creds
-                    file_content = classifier.download_s3_file(creds[0], creds[1], creds[2], selected_bucket, selected_file)
-                    # ... run scan logic ...
+            if 'gcp_buckets' in st.session_state:
+                sel_b = st.selectbox("Bucket", st.session_state.gcp_buckets)
+                if st.button("List"):
+                    st.session_state.gcp_files = classifier.get_gcs_files(st.session_state.gcp_creds, sel_b)
+                if 'gcp_files' in st.session_state:
+                    sel_f = st.selectbox("File", st.session_state.gcp_files)
+                    if st.button("Scan"):
+                        content = classifier.download_gcs_file(st.session_state.gcp_creds, sel_b, sel_f)
+                        try:
+                            df = pd.read_csv(io.BytesIO(content))
+                            render_analytics(classifier.get_pii_counts_dataframe(df), df)
+                            st.dataframe(classifier.mask_dataframe(df))
+                        except: st.error("Only CSV supported.")
 
-    elif source == "Azure Blob Storage":
-        render_source_header("Azure Blob Storage Import", "https://upload.wikimedia.org/wikipedia/commons/f/fa/Microsoft_Azure.svg")
+    # 4. ENTERPRISE CONNECTORS (NEW)
+    elif source == "Gmail":
+        render_source_header("Gmail Scanner", "https://upload.wikimedia.org/wikipedia/commons/7/7e/Gmail_icon_%282020%29.svg")
+        st.info("Upload your OAuth 2.0 Client Secret JSON (Desktop App).")
+        uploaded_file = st.file_uploader("Upload client_secret.json", type=['json'], key="gmail_secret")
+        num_emails = st.slider("Number of recent emails to scan", 5, 50, 10)
         
-        st.info("Get your Connection String from Azure Portal -> Storage Account -> Access keys.")
-        conn_str = st.text_input("Connection String", type="password")
+        if uploaded_file and st.button("Authenticate & Scan"):
+            with st.spinner("Authenticating..."):
+                try:
+                    df = classifier.get_gmail_data(uploaded_file, num_emails)
+                    if not df.empty:
+                        st.session_state.gmail_data = df
+                        st.success("Fetched!")
+                except Exception as e: st.error(f"Error: {e}")
 
-        if st.button("🔗 Connect to Azure"):
-            containers = classifier.get_azure_containers(conn_str)
-            if containers:
-                st.session_state.azure_conn = conn_str
-                st.session_state.azure_containers = containers
-                st.success(f"Connected! Found {len(containers)} containers.")
-            else:
-                st.error("Connection Failed. Check your string.")
+        if 'gmail_data' in st.session_state:
+            df = st.session_state.gmail_data
+            render_analytics(classifier.get_pii_counts_dataframe(df), df)
+            render_inspector(df.iloc[0]['Content'])
+            st.dataframe(classifier.mask_dataframe(df))
 
-        if 'azure_containers' in st.session_state:
-            selected_container = st.selectbox("Select Container", st.session_state.azure_containers)
-            if st.button("📂 List Blobs"):
-                st.session_state.azure_blobs = classifier.get_azure_blobs(st.session_state.azure_conn, selected_container)
-            if 'azure_blobs' in st.session_state and st.session_state.azure_blobs:
-                selected_blob = st.selectbox("Select Blob", st.session_state.azure_blobs)
-                if st.button("⬇️ Download & Scan"):
-                    file_content = classifier.download_azure_blob(st.session_state.azure_conn, selected_container, selected_blob)
-                    # ... run scan logic ...
+    elif source == "Slack":
+        render_source_header("Slack Scanner", "https://upload.wikimedia.org/wikipedia/commons/d/d5/Slack_icon_2019.svg")
+        token = st.text_input("Bot User OAuth Token (xoxb-...)")
+        channel = st.text_input("Channel ID")
+        if st.button("Scan Channel"):
+            with st.spinner("Fetching messages..."):
+                df = classifier.get_slack_messages(token, channel)
+                if not df.empty:
+                    render_analytics(classifier.get_pii_counts_dataframe(df), df)
+                    render_inspector(df.iloc[0]['Content'])
+                    st.dataframe(classifier.mask_dataframe(df))
+                else: st.error("No messages found or auth failed.")
 
-    # --- GCP BUCKETS LOGIC (NEW) ---
-    elif source == "Google Cloud Storage":
-        render_source_header("Google Cloud Storage Import", "https://upload.wikimedia.org/wikipedia/commons/5/51/Google_Cloud_logo.svg")
+    elif source == "Confluence":
+        render_source_header("Confluence Scanner", "https://upload.wikimedia.org/wikipedia/commons/8/88/Atlassian_Confluence_Logo.svg")
+        url = st.text_input("Confluence URL (https://your-domain.atlassian.net)")
+        user = st.text_input("Username (Email)")
+        token = st.text_input("API Token", type="password")
+        page_id = st.text_input("Page ID")
         
-        st.info("Upload your GCP Service Account JSON key (must have Storage Object Viewer role).")
-        gcp_creds_file = st.file_uploader("Upload service-account.json", type=['json'], key="gcp_upload")
-        
-        if gcp_creds_file:
-            gcp_creds = json.load(gcp_creds_file)
-            st.session_state.gcp_creds = gcp_creds
-            st.success("GCP Credentials Loaded!")
-            
-            if st.button("🔗 Connect & List Buckets"):
-                buckets = classifier.get_gcs_buckets(gcp_creds)
-                if buckets:
-                    st.session_state.gcs_buckets = buckets
-                    st.success(f"Connected! Found {len(buckets)} buckets.")
-                else:
-                    st.error("Connection Failed or No Buckets found.")
-
-        if 'gcs_buckets' in st.session_state:
-            selected_bucket = st.selectbox("Select Bucket", st.session_state.gcs_buckets)
-            
-            if st.button("📂 List Files in Bucket"):
-                st.session_state.gcs_files = classifier.get_gcs_files(st.session_state.gcp_creds, selected_bucket)
-            
-            if 'gcs_files' in st.session_state and st.session_state.gcs_files:
-                selected_file = st.selectbox("Select File", st.session_state.gcs_files)
-                
-                if st.button("⬇️ Download & Scan"):
-                    file_content = classifier.download_gcs_file(st.session_state.gcp_creds, selected_bucket, selected_file)
-                    
-                    if not file_content:
-                        st.error("Failed to download file.")
-                    else:
-                        st.success(f"Scanning {selected_file}...")
-                        mask_mode = st.checkbox("🔒 Mask Results", value=False, key="gcs_mask")
-                        
-                        ext = selected_file.split('.')[-1].lower()
-                        
-                        # Reuse Scan Logic (Same as AWS/Azure)
-                        if ext == 'pdf':
-                            text = classifier.get_pdf_page_text(file_content, 0)
-                            render_analytics(classifier.get_pii_counts(text), None)
-                            render_inspector(text)
-                            img = classifier.get_labeled_pdf_image(file_content, 0)
-                            if img: st.image(img, caption="Page 1 Preview")
-                        elif ext == 'csv':
-                            df = pd.read_csv(io.BytesIO(file_content))
-                            render_analytics(classifier.get_pii_counts_dataframe(df), df)
-                            render_inspector(df.head(10).to_string())
-                            if mask_mode: st.dataframe(classifier.mask_dataframe(df))
-                            else: st.markdown(classifier.scan_dataframe_with_html(df).to_html(escape=False), unsafe_allow_html=True)
-                        elif ext == 'json':
-                            df = classifier.get_json_data(io.BytesIO(file_content))
-                            render_analytics(classifier.get_pii_counts_dataframe(df), df)
-                            render_inspector(df.head(10).to_string())
-                            if mask_mode: st.dataframe(classifier.mask_dataframe(df))
-                            else: st.markdown(classifier.scan_dataframe_with_html(df).to_html(escape=False), unsafe_allow_html=True)
-                        elif ext in ['parquet', 'pqt']:
-                            df = classifier.get_parquet_data(file_content)
-                            render_analytics(classifier.get_pii_counts_dataframe(df), df)
-                            render_inspector(df.head(10).to_string())
-                            if mask_mode: st.dataframe(classifier.mask_dataframe(df))
-                            else: st.markdown(classifier.scan_dataframe_with_html(df).to_html(escape=False), unsafe_allow_html=True)
-            elif 'gcs_files' in st.session_state:
-                st.warning("Bucket is empty.")
+        if st.button("Scan Page"):
+            with st.spinner("Fetching page..."):
+                df = classifier.get_confluence_page(url, user, token, page_id)
+                if not df.empty:
+                    render_analytics(classifier.get_pii_counts_dataframe(df), df)
+                    render_inspector(df.iloc[0]['Content'])
+                    st.markdown(classifier.scan_dataframe_with_html(df).to_html(escape=False), unsafe_allow_html=True)
+                else: st.error("Failed to fetch page.")
 
 if __name__ == "__main__":
     main()
\ No newline at end of file