Spaces:

acecalisto3
/

Dbgod

Sleeping

App Files Files Community

google-labs-jules[bot] commited on Mar 2

Commit

f4c4bbd

1 Parent(s): 5caa7cf

Fix SyntaxError in app.py and 2app.py

Browse files

Files changed (8) hide show

.gitattributes +35 -0
2app.py +449 -0
Dockerfile +25 -0
README.md +12 -0
app.py +960 -0
huggingface.yml +3 -0
nltk_setup.py +53 -0
requirements.txt +17 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

2app.py ADDED Viewed

	@@ -0,0 +1,449 @@

+import platform
+import os
+import sqlite3
+import uuid
+import datetime
+import shutil
+import traceback
+import logging
+from pathlib import Path
+from abc import ABC, abstractmethod
+from typing import Dict, Any, List
+import gradio as gr
+import pandas as pd
+# --- Base Classes ---
+class Interface(ABC):
+    @abstractmethod
+    def launch(self):
+        pass
+class Command(ABC):
+    @abstractmethod
+    def execute(self):
+        pass
+# --- Database Manager Implementation ---
+class DatabaseManager:
+    """Handles all database operations including creation, connection, and CRUD operations."""
+    def __init__(self, db_path: str = None):
+        if db_path is None:
+            if platform.system() == 'Windows':
+                base_dir = os.path.join(os.environ['APPDATA'], 'FileStorageApp')
+            elif platform.system() == 'Darwin':
+                base_dir = os.path.join(os.path.expanduser('~'), 'Library', 'Application Support', 'FileStorageApp')
+            else:
+                base_dir = os.path.join(os.path.expanduser('~'), '.filestorage')
+            os.makedirs(base_dir, exist_ok=True)
+            self.db_path = os.path.join(base_dir, 'file_storage.db')
+        else:
+            self.db_path = db_path
+        self.conn = None
+        self.cursor = None
+        self.connect()
+        self.create_tables()
+    def connect(self) -> None:
+        """Establish a connection to the SQLite database."""
+        try:
+            self.conn = sqlite3.connect(self.db_path)
+            self.conn.execute("PRAGMA foreign_keys = ON")
+            self.cursor = self.conn.cursor()
+        except sqlite3.Error as e:
+            logging.error(f"Database connection error: {e}")
+            raise
+    def create_tables(self) -> None:
+        """Create necessary tables if they don't exist."""
+        tables = [
+            '''CREATE TABLE IF NOT EXISTS files (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                filename TEXT NOT NULL,
+                original_filename TEXT NOT NULL,
+                file_path TEXT NOT NULL,
+                file_size INTEGER NOT NULL,
+                file_type TEXT,
+                upload_date DATETIME DEFAULT CURRENT_TIMESTAMP
+            )''',
+            '''CREATE TABLE IF NOT EXISTS metadata (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_id INTEGER NOT NULL,
+                key TEXT NOT NULL,
+                value TEXT,
+                FOREIGN KEY (file_id) REFERENCES files (id) ON DELETE CASCADE
+            )''',
+            '''CREATE TABLE IF NOT EXISTS chunks (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_id INTEGER NOT NULL,
+                chunk_index INTEGER NOT NULL,
+                chunk_text TEXT NOT NULL,
+                chunk_size INTEGER NOT NULL,
+                FOREIGN KEY (file_id) REFERENCES files (id) ON DELETE CASCADE
+            )'''
+        ]
+        try:
+            for table in tables:
+                self.cursor.execute(table)
+            self.conn.commit()
+        except sqlite3.Error as e:
+            self.conn.rollback()
+            logging.error(f"Error creating tables: {e}")
+            raise
+    def insert_file(self, file_data: Dict[str, Any]) -> int:
+        """Insert file information into the database."""
+        try:
+            self.cursor.execute('''
+                INSERT INTO files (filename, original_filename, file_path, file_size, file_type)
+                VALUES (?, ?, ?, ?, ?)
+            ''', (file_data['filename'], file_data['original_filename'],
+                 file_data['file_path'], file_data['file_size'], file_data['file_type']))
+            self.conn.commit()
+            return self.cursor.lastrowid
+        except sqlite3.Error as e:
+            self.conn.rollback()
+            logging.error(f"Error inserting file: {e}")
+            raise
+    def insert_metadata(self, file_id: int, metadata: Dict[str, str]) -> None:
+        """Insert metadata for a specific file."""
+        try:
+            for key, value in metadata.items():
+                self.cursor.execute('''
+                    INSERT INTO metadata (file_id, key, value)
+                    VALUES (?, ?, ?)
+                ''', (file_id, key, value))
+            self.conn.commit()
+        except sqlite3.Error as e:
+            self.conn.rollback()
+            logging.error(f"Error inserting metadata: {e}")
+            raise
+    def insert_chunk(self, file_id: int, chunk_index: int, chunk_text: str) -> None:
+        """Insert a text chunk into the database."""
+        try:
+            chunk_size = len(chunk_text.split())
+            self.cursor.execute('''
+                INSERT INTO chunks (file_id, chunk_index, chunk_text, chunk_size)
+                VALUES (?, ?, ?, ?)
+            ''', (file_id, chunk_index, chunk_text, chunk_size))
+            self.conn.commit()
+        except sqlite3.Error as e:
+            self.conn.rollback()
+            logging.error(f"Error inserting chunk: {e}")
+            raise
+    def log_error(self, error_data: Dict[str, str]) -> None:
+        """Log errors to the database."""
+        try:
+            self.cursor.execute('''
+                INSERT INTO metadata (file_id, key, value)
+                VALUES (?, ?, ?)
+            ''', (-1, 'error', str(error_data)))
+            self.conn.commit()
+        except sqlite3.Error as e:
+            logging.error(f"Error logging error: {e}")
+    def close(self) -> None:
+        """Close the database connection."""
+        if self.conn:
+            self.conn.close()
+# --- File Processor Implementation ---
+class FileProcessor:
+    """Handles file uploads, storage, and metadata extraction."""
+    def __init__(self, upload_folder: str = None):
+        self.upload_folder = upload_folder or os.path.join(Path.home(), 'FileUploads')
+        os.makedirs(self.upload_folder, exist_ok=True)
+    def save_file(self, file: Any) -> Dict[str, Any]:
+        """Save the uploaded file and extract metadata."""
+        filename = f"{uuid.uuid4()}_{file.name}"
+        file_path = os.path.join(self.upload_folder, filename)
+        try:
+            with open(file_path, "wb") as f:
+                f.write(file.read())
+            return {
+                'filename': filename,
+                'original_filename': file.name,
+                'file_path': file_path,
+                'file_size': os.path.getsize(file_path),
+                'file_type': file.name.split('.')[-1] if '.' in file.name else 'unknown'
+            }
+        except Exception as e:
+            logging.error(f"Error saving file: {e}")
+            raise
+    def extract_content(self, file_path: str) -> str:
+        """Extract text content from a file."""
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                return f.read()
+        except Exception as e:
+            logging.error(f"Error extracting content: {e}")
+            raise
+# --- Text Chunker Implementation ---
+class TextChunker:
+    """Splits text content into manageable chunks."""
+    def __init__(self, chunk_size: int = 500, overlap: int = 50):
+        self.chunk_size = chunk_size
+        self.overlap = overlap
+    def chunk_text(self, text: str) -> List[str]:
+        """Split text into chunks with overlap."""
+        words = text.split()
+        chunks = []
+        start = 0
+        while start < len(words):
+            end = start + self.chunk_size
+            chunks.append(' '.join(words[start:end]))
+            start = end - self.overlap
+        return chunks
+# --- Command Handler Implementation ---
+class CommandHandler:
+    """Manages command execution."""
+    def __init__(self):
+        self.commands = {}
+    def register_command(self, name: str, command: Command):
+        self.commands[name] = command
+    def execute_command(self, name: str) -> bool:
+        if name in self.commands:
+            self.commands[name].execute()
+            return True
+        logging.warning(f"Command '{name}' not found.")
+        return False
+# --- Main Application Implementation ---
+class Application(Interface):
+    """Core application class."""
+    def __init__(self):
+        self.db_manager = DatabaseManager()
+        self.file_processor = FileProcessor()
+        self.text_chunker = TextChunker(chunk_size=512, overlap=50)
+        self.command_handler = CommandHandler()
+        self.processed_data = None
+    def run(self, uploaded_file: Any) -> None:
+        """Main processing pipeline."""
+        try:
+            if not uploaded_file:
+                raise ValueError("No file provided for processing")
+            # Process file
+            file_info = self.file_processor.save_file(uploaded_file)
+            file_id = self.db_manager.insert_file(file_info)
+            # Extract and chunk content
+            raw_content = self.file_processor.extract_content(file_info['file_path'])
+            chunks = self.text_chunker.chunk_text(raw_content)
+            # Store chunks and metadata
+            self.db_manager.insert_metadata(file_id, {
+                'source': 'upload',
+                'processed_at': datetime.datetime.now().isoformat()
+            })
+            for idx, chunk in enumerate(chunks):
+                self.db_manager.insert_chunk(file_id, idx+1, chunk)
+            self.processed_data = {
+                'filename': uploaded_file.name,
+                'chunk_count': len(chunks),
+                'status': 'processed'
+            }
+        except Exception as e:
+            self._handle_error(e)
+            raise
+    def _handle_error(self, error: Exception) -> None:
+        """Centralized error handling."""
+        error_data = {
+            'timestamp': datetime.datetime.now().isoformat(),
+            'error_type': type(error).__name__,
+            'message': str(error),
+            'stack_trace': traceback.format_exc()
+        }
+        self.db_manager.log_error(error_data)
+        self.processed_data = {'status': 'failed'}
+# --- Gradio Interface Implementation ---
+class DataDeityInterface:
+    def __init__(self, app):
+        self.app = app
+        self._setup_theme()
+    def _setup_theme(self):
+        self.theme = gr.themes.Default(
+            primary_hue="emerald",
+            secondary_hue="teal",
+            font=[gr.themes.GoogleFont("Fira Code"), "Arial", "sans-serif"]
+        )
+    def _file_upload_tab(self):
+        with gr.Tab("📤 Upload & Process"):
+            with gr.Row():
+                file_input = gr.File(label="Drag files here", file_count="multiple")
+                stats_output = gr.JSON(label="Processing Stats")
+            with gr.Row():
+                process_btn = gr.Button("⚡ Process Files", variant="primary")
+                clear_btn = gr.Button("🧹 Clear Cache")
+            file_output = gr.Dataframe(label="File Contents Preview")
+            process_btn.click(
+                self.process_file,
+                inputs=file_input,
+                outputs=[stats_output, file_output]
+            )
+            clear_btn.click(lambda: None, outputs=[file_input, stats_output, file_output])
+        return file_input
+    def _data_explorer_tab(self):
+        with gr.Tab("🔍 Data Explorer"):
+            with gr.Row():
+                refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary")
+                search_bar = gr.Textbox(placeholder="Search across all data...")
+            with gr.Tabs():
+                with gr.Tab("Database View"):
+                    files_table = gr.Dataframe(label="Stored Files")
+                    metadata_table = gr.Dataframe(label="File Metadata")
+                    chunks_table = gr.Dataframe(label="Text Chunks")
+                with gr.Tab("Analytics View"):
+                    stats_plot = gr.Plot(label="Data Distribution")
+                    correlations = gr.Matrix(label="Data Correlations")
+            refresh_btn.click(
+                self.refresh_data,
+                outputs=[files_table, metadata_table, chunks_table]
+            )
+    def _command_interface_tab(self):
+        with gr.Tab("💻 Command Console"):
+            cmd_input = gr.Textbox(
+                placeholder="Enter data command...",
+                lines=3,
+                max_lines=10
+            )
+            with gr.Row():
+                execute_btn = gr.Button("🚀 Execute", variant="primary")
+                cmd_history_btn = gr.Button("🕒 History")
+            cmd_output = gr.JSON(label="Command Results")
+            cmd_explain = gr.Markdown("### Command Explanation")
+            execute_btn.click(
+                self.execute_command,
+                inputs=cmd_input,
+                outputs=[cmd_output, cmd_explain]
+            )
+    def create_interface(self):
+        with gr.Blocks(theme=self.theme, title="Data Deity") as interface:
+            gr.Markdown("# 🧠 Data Deity - Ultimate Data Omnipotence Interface")
+            with gr.Tabs():
+                file_input = self._file_upload_tab()
+                self._data_explorer_tab()
+                self._command_interface_tab()
+        return interface
+    def process_file(self, files):
+        try:
+            processed_files = []
+            for file in files:
+                self.app.run(file)
+                processed_files.append({
+                    "filename": file.name,
+                    "chunks": self.app.processed_data['chunk_count'],
+                    "status": "processed",
+                    "timestamp": datetime.datetime.now().isoformat()
+                })
+            stats = {
+                "total_files": len(processed_files),
+                "total_chunks": sum(f['chunks'] for f in processed_files),
+                "average_size": f"{sum(f.size for f in files)/1024/1024:.2f}MB"
+            }
+            preview = pd.DataFrame({
+                "File": [f.name for f in files],
+                "Type": [f.name.split('.')[-1] for f in files],
+                "Status": ["✅ Processed"]*len(files)
+            })
+            return stats, preview
+        except Exception as e:
+            return {"error": str(e)}, pd.DataFrame()
+    def refresh_data(self):
+        try:
+            files = self.app.db_manager.cursor.execute("SELECT * FROM files").fetchall()
+            metadata = self.app.db_manager.cursor.execute("SELECT * FROM metadata").fetchall()
+            chunks = self.app.db_manager.cursor.execute("SELECT * FROM chunks").fetchall()
+            files_df = pd.DataFrame(files, columns=["ID", "Filename", "Original", "Path", "Size", "Type", "Uploaded"])
+            metadata_df = pd.DataFrame(metadata, columns=["ID", "File ID", "Key", "Value"])
+            chunks_df = pd.DataFrame(chunks, columns=["ID", "File ID", "Index", "Text", "Size"])
+            return files_df, metadata_df, chunks_df
+        except Exception as e:
+            return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+    def execute_command(self, command):
+        try:
+            if "list files" in command.lower():
+                files = self.app.db_manager.cursor.execute("SELECT filename, file_type, upload_date FROM files").fetchall()
+                return {"result": files}, "### File Listing Command\nRetrieved all stored files from database."
+            elif "search" in command.lower():
+                term = command.split("search")[1].strip()
+                results = self.app.db_manager.cursor.execute(
+                    "SELECT chunk_text FROM chunks WHERE chunk_text LIKE ?",
+                    (f"%{term}%",)
+                ).fetchall()
+                return {"matches": [r[0] for r in results]}, f"### Search Results\nFound {len(results)} matches for '{term}'"
+            else:
+                return {"error": "Command not recognized"}, "### Unrecognized Command\nTry 'list files' or 'search <term>'"
+        except Exception as e:
+            return {"error": str(e)}, "### Command Execution Failed"
+# --- Main Execution ---
+if __name__ == "__main__":
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s'
+    )
+    try:
+        app = Application()
+        interface = DataDeityInterface(app)
+        interface.create_interface().launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=True
+        )
+    except KeyboardInterrupt:
+        logging.info("\nApplication shutdown requested")
+    finally:
+        app.db_manager.close()

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+# Use the official Python image from the Docker Hub
+FROM python:3.9-slim
+# Set the working directory in the container
+WORKDIR ./app
+# Copy the requirements.txt file into the container
+COPY requirements.txt .
+RUN mkdir -p /home/user/nltk_data && chmod a+rwx /home/user/nltk_data
+# Install the required packages
+RUN pip install --no-cache-dir -r requirements.txt
+# Install additional packages if needed
+RUN pip install matplotlib
+# Copy the rest of your application code into the container
+COPY . .
+# Download NLTK resources
+RUN python -m nltk.downloader punkt vader_lexicon stopwords
+# Command to run your application
+CMD ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: Dbgod
+emoji: 🌍
+colorFrom: red
+colorTo: green
+sdk: gradio
+sdk_version: 5.32.0
+app_file: app.py
+pinned: true
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,960 @@

+import gradio as gr
+import pandas as pd
+import os
+import json
+import sqlite3
+import tempfile
+import nltk
+import traceback
+import datetime
+import time
+import numpy as np
+import matplotlib.pyplot as plt
+import io
+import base64
+import requests
+import re
+from pathlib import Path
+from nltk.sentiment import SentimentIntensityAnalyzer
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+from sklearn.linear_model import LinearRegression
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report
+from sklearn.feature_extraction.text import TfidfVectorizer
+import pymongo
+import redis
+import pymysql  # Using pymysql instead of mysql.connector
+import psycopg2
+from bs4 import BeautifulSoup
+def setup_nltk():
+    try:
+        # Use a temporary directory for NLTK data
+        nltk_data_dir = os.path.join(tempfile.gettempdir(), 'nltk_data')
+        os.makedirs(nltk_data_dir, exist_ok=True)
+        nltk.data.path.append(nltk_data_dir)
+        # Download necessary NLTK data
+        nltk_resources = ['punkt', 'stopwords', 'vader_lexicon']
+        for resource in nltk_resources:
+            try:
+                nltk.data.find(f'tokenizers/{resource}' if resource == 'punkt'
+                              else f'corpora/{resource}' if resource == 'stopwords'
+                              else f'sentiment/{resource}')
+            except LookupError:
+                nltk.download(resource, download_dir=nltk_data_dir, quiet=True)
+        return True
+    except Exception as e:
+        print(f"Error setting up NLTK: {e}")
+        return False
+# Initialize NLTK
+if not setup_nltk():
+    print("Failed to set up NLTK. Some NLP features may not work properly.")
+class DatabaseManager:
+    def __init__(self, db_path=None):
+        try:
+            # Use a temporary directory for the database
+            if db_path is None:
+                db_dir = os.path.join(tempfile.gettempdir(), 'data')
+                os.makedirs(db_dir, exist_ok=True)
+                db_path = os.path.join(db_dir, 'data_deity.db')
+            self.db_path = db_path
+            self.connection = sqlite3.connect(db_path)
+            self.cursor = self.connection.cursor()
+            self._create_tables()
+            print(f"Successfully initialized database at {db_path}")
+        except sqlite3.Error as e:
+            print(f"Failed to initialize database: {e}")
+            # Fallback to in-memory database if file-based DB fails
+            try:
+                print("Trying in-memory database as fallback...")
+                self.db_path = ":memory:"
+                self.connection = sqlite3.connect(":memory:")
+                self.cursor = self.connection.cursor()
+                self._create_tables()
+                print("Successfully initialized in-memory database")
+            except sqlite3.Error as e2:
+                print(f"Failed to initialize in-memory database: {e2}")
+                raise
+    def _create_tables(self):
+        try:
+            self.cursor.execute('''
+                CREATE TABLE IF NOT EXISTS files (
+                    id INTEGER PRIMARY KEY,
+                    filename TEXT,
+                    original TEXT,
+                    path TEXT,
+                    size INTEGER,
+                    file_type TEXT,
+                    upload_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            ''')
+            self.cursor.execute('''
+                CREATE TABLE IF NOT EXISTS metadata (
+                    id INTEGER PRIMARY KEY,
+                    file_id INTEGER,
+                    meta_key TEXT,
+                    meta_value TEXT,
+                    FOREIGN KEY (file_id) REFERENCES files (id)
+                )
+            ''')
+            self.cursor.execute('''
+                CREATE TABLE IF NOT EXISTS chunks (
+                    id INTEGER PRIMARY KEY,
+                    file_id INTEGER,
+                    chunk_index INTEGER,
+                    chunk_text TEXT,
+                    chunk_size INTEGER,
+                    FOREIGN KEY (file_id) REFERENCES files (id)
+                )
+            ''')
+            self.cursor.execute('''
+                CREATE TABLE IF NOT EXISTS insights (
+                    id INTEGER PRIMARY KEY,
+                    file_id INTEGER,
+                    insight_type TEXT,
+                    insight_text TEXT,
+                    confidence REAL,
+                    timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    is_speculative BOOLEAN,
+                    FOREIGN KEY (file_id) REFERENCES files (id)
+                )
+            ''')
+            self.cursor.execute('''
+                CREATE TABLE IF NOT EXISTS analytics (
+                    id INTEGER PRIMARY KEY,
+                    file_id INTEGER,
+                    analysis_type TEXT,
+                    analysis_result TEXT,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    FOREIGN KEY (file_id) REFERENCES files (id)
+                )
+            ''')
+            self.connection.commit()
+            print("Successfully created database tables")
+        except sqlite3.Error as e:
+            print(f"Error creating tables: {e}")
+            raise
+    def add_file(self, filename, original, path, size, file_type):
+        try:
+            self.cursor.execute('''
+                INSERT INTO files (filename, original, path, size, file_type)
+                VALUES (?, ?, ?, ?, ?)
+            ''', (filename, original, path, size, file_type))
+            self.connection.commit()
+            return self.cursor.lastrowid
+        except sqlite3.Error as e:
+            print(f"Database Error in add_file: {e}")
+            self.connection.rollback()
+            return None
+    def add_metadata(self, file_id, meta_key, meta_value):
+        try:
+            self.cursor.execute('''
+                INSERT INTO metadata (file_id, meta_key, meta_value)
+                VALUES (?, ?, ?)
+            ''', (file_id, meta_key, meta_value))
+            self.connection.commit()
+        except sqlite3.Error as e:
+            print(f"Database Error in add_metadata: {e}")
+            self.connection.rollback()
+    def add_chunk(self, file_id, chunk_index, chunk_text, chunk_size):
+        try:
+            self.cursor.execute('''
+                INSERT INTO chunks (file_id, chunk_index, chunk_text, chunk_size)
+                VALUES (?, ?, ?, ?)
+            ''', (file_id, chunk_index, chunk_text, chunk_size))
+            self.connection.commit()
+        except sqlite3.Error as e:
+            print(f"Database Error in add_chunk: {e}")
+            self.connection.rollback()
+    def add_insight(self, file_id, insight_type, insight_text, confidence, is_speculative):
+        try:
+            self.cursor.execute('''
+                INSERT INTO insights (file_id, insight_type, insight_text, confidence, is_speculative)
+                VALUES (?, ?, ?, ?, ?)
+            ''', (file_id, insight_type, insight_text, confidence, is_speculative))
+            self.connection.commit()
+        except sqlite3.Error as e:
+            print(f"Database Error in add_insight: {e}")
+            self.connection.rollback()
+    def add_analysis(self, file_id, analysis_type, analysis_result):
+        try:
+            self.cursor.execute('''
+                INSERT INTO analytics (file_id, analysis_type, analysis_result)
+                VALUES (?, ?, ?)
+            ''', (file_id, analysis_type, analysis_result))
+            self.connection.commit()
+        except sqlite3.Error as e:
+            print(f"Database Error in add_analysis: {e}")
+            self.connection.rollback()
+    def get_file_by_id(self, file_id):
+        try:
+            self.cursor.execute('''
+                SELECT * FROM files WHERE id = ?
+            ''', (file_id,))
+            return self.cursor.fetchone()
+        except sqlite3.Error as e:
+            print(f"Database Error in get_file_by_id: {e}")
+            return None
+    def get_analysis_by_file_id(self, file_id):
+        try:
+            self.cursor.execute('''
+                SELECT analysis_type, analysis_result
+                FROM analytics
+                WHERE file_id = ?
+            ''', (file_id,))
+            return self.cursor.fetchall()
+        except sqlite3.Error as e:
+            print(f"Database Error in get_analysis_by_file_id: {e}")
+            return []
+    def get_insights_by_file_id(self, file_id):
+        try:
+            self.cursor.execute('''
+                SELECT insight_type, insight_text, confidence
+                FROM insights
+                WHERE file_id = ?
+            ''', (file_id,))
+            return self.cursor.fetchall()
+        except sqlite3.Error as e:
+            print(f"Database Error in get_insights_by_file_id: {e}")
+            return []
+    def close(self):
+        if hasattr(self, 'connection') and self.connection:
+            self.connection.close()
+class FileProcessor:
+    def __init__(self, db_manager):
+        self.db_manager = db_manager
+        self.sia = SentimentIntensityAnalyzer()
+    def process_file(self, file):
+        try:
+            # Write the file content to a temporary file
+            temp_dir = tempfile.mkdtemp()
+            file_path = os.path.join(temp_dir, os.path.basename(file.name))
+            import shutil
+            shutil.copy(file.name, file_path)
+            file_size = os.path.getsize(file_path)
+            file_extension = os.path.splitext(file.name)[1].lower()
+            if file_extension == '.txt':
+                file_type = 'text'
+            elif file_extension == '.csv':
+                file_type = 'csv'
+            elif file_extension == '.json':
+                file_type = 'json'
+            else:
+                raise ValueError(f"Unsupported file type: {file_extension}")
+            file_id = self.db_manager.add_file(
+                filename=os.path.basename(file.name),
+                original=os.path.basename(file.name),
+                path=file_path,
+                size=file_size,
+                file_type=file_type
+            )
+            if not file_id:
+                raise Exception("Failed to add file to database")
+            chunk_count = 0
+            if file_type == 'text':
+                chunk_count = self._process_text_file(file_path, file_id)
+            elif file_type == 'csv':
+                chunk_count = self._process_csv_file(file_path, file_id)
+            elif file_type == 'json':
+                chunk_count = self._process_json_file(file_path, file_id)
+            return file_id, chunk_count
+        except Exception as e:
+            print(f"Error processing file: {e}")
+            print(traceback.format_exc())
+            raise
+    def _process_text_file(self, file_path, file_id):
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                text = f.read()
+            self.db_manager.add_metadata(file_id, 'character_count', str(len(text)))
+            self.db_manager.add_metadata(file_id, 'word_count', str(len(text.split())))
+            chunks = text.split('\n\n')
+            for i, chunk in enumerate(chunks):
+                if chunk.strip():
+                    self.db_manager.add_chunk(file_id, i, chunk, len(chunk))
+            sentiment = self.sia.polarity_scores(text)
+            sentiment_result = json.dumps(sentiment)
+            self.db_manager.add_analysis(file_id, 'sentiment_analysis', sentiment_result)
+            tokens = word_tokenize(text)
+            stop_words = set(stopwords.words('english'))
+            filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
+            token_analysis = {
+                'total_tokens': len(tokens),
+                'unique_tokens': len(set(tokens)),
+                'tokens_without_stopwords': len(filtered_tokens),
+                'sample_tokens': filtered_tokens[:20] if len(filtered_tokens) > 20 else filtered_tokens
+            }
+            self.db_manager.add_analysis(file_id, 'tokenization', json.dumps(token_analysis))
+            if sentiment['compound'] > 0.5:
+                self.db_manager.add_insight(
+                    file_id, 'sentiment', 'Text has a very positive tone',
+                    sentiment['compound'], False
+                )
+            elif sentiment['compound'] < -0.5:
+                self.db_manager.add_insight(
+                    file_id, 'sentiment', 'Text has a very negative tone',
+                    abs(sentiment['compound']), False
+                )
+            return len(chunks)
+        except Exception as e:
+            print(f"Error processing text file: {e}")
+            print(traceback.format_exc())
+            raise
+    def _process_csv_file(self, file_path, file_id):
+        try:
+            df = pd.read_csv(file_path)
+            self.db_manager.add_metadata(file_id, 'row_count', str(len(df)))
+            self.db_manager.add_metadata(file_id, 'column_count', str(len(df.columns)))
+            self.db_manager.add_metadata(file_id, 'columns', ','.join(df.columns))
+            chunk_size = 100
+            chunks = [df[i:i + chunk_size] for i in range(0, len(df), chunk_size)]
+            for i, chunk in enumerate(chunks):
+                chunk_text = chunk.to_json(orient='records')
+                self.db_manager.add_chunk(file_id, i, chunk_text, len(chunk_text))
+            numeric_columns = df.select_dtypes(include=['number']).columns
+            if len(numeric_columns) > 0:
+                stats = df[numeric_columns].describe().to_json()
+                self.db_manager.add_analysis(file_id, 'statistical_analysis', stats)
+                if len(numeric_columns) >= 2 and len(df) >= 20:
+                    try:
+                        target_col = numeric_columns[0]
+                        feature_cols = [col for col in numeric_columns if col != target_col]
+                        X = df[feature_cols]
+                        y = df[target_col]
+                        X_train, X_test, y_train, y_test = train_test_split(
+                            X, y, test_size=0.2, random_state=42
+                        )
+                        model = RandomForestRegressor(n_estimators=50, random_state=42)
+                        model.fit(X_train, y_train)
+                        y_pred = model.predict(X_test)
+                        mse = mean_squared_error(y_test, y_pred)
+                        r2 = r2_score(y_test, y_pred)
+                        model_results = {
+                            'target_column': target_col,
+                            'feature_columns': feature_cols,
+                            'mean_squared_error': mse,
+                            'r2_score': r2,
+                            'feature_importance': {col: imp for col, imp in zip(feature_cols, model.feature_importances_)}
+                        }
+                        self.db_manager.add_analysis(file_id, 'predictive_model', json.dumps(model_results))
+                        if r2 > 0.7:
+                            self.db_manager.add_insight(
+                                file_id, 'prediction',
+                                f'Strong predictive relationship found between {target_col} and other variables',
+                                r2, False
+                            )
+                        elif r2 > 0.3:
+                            self.db_manager.add_insight(
+                                file_id, 'prediction',
+                                f'Moderate predictive relationship found between {target_col} and other variables',
+                                r2, False
+                            )
+                    except Exception as e:
+                        print(f"Could not create predictive model: {e}")
+            return len(chunks)
+        except Exception as e:
+            print(f"Error processing CSV file: {e}")
+            print(traceback.format_exc())
+            raise
+    def _process_json_file(self, file_path, file_id):
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+            json_str = json.dumps(data)
+            if isinstance(data, list):
+                self.db_manager.add_metadata(file_id, 'item_count', str(len(data)))
+                self.db_manager.add_metadata(file_id, 'structure', 'array')
+            elif isinstance(data, dict):
+                self.db_manager.add_metadata(file_id, 'key_count', str(len(data.keys())))
+                self.db_manager.add_metadata(file_id, 'structure', 'object')
+                self.db_manager.add_metadata(file_id, 'keys', ','.join(data.keys()))
+            chunks = []
+            if isinstance(data, list):
+                chunk_size = 10
+                chunks = [data[i:i + chunk_size] for i in range(0, len(data), chunk_size)]
+            else:
+                chunks = [data]
+            for i, chunk in enumerate(chunks):
+                chunk_text = json.dumps(chunk)
+                self.db_manager.add_chunk(file_id, i, chunk_text, len(chunk_text))
+            structure_analysis = self._analyze_json_structure(data)
+            self.db_manager.add_analysis(file_id, 'structure_analysis', json.dumps(structure_analysis))
+            return len(chunks)
+        except Exception as e:
+            print(f"Error processing JSON file: {e}")
+            print(traceback.format_exc())
+            raise
+    def _analyze_json_structure(self, data, max_depth=3, current_depth=0):
+        if current_depth >= max_depth:
+            return "..."
+        if isinstance(data, dict):
+            result = {}
+            for k, v in list(data.items())[:10]:
+                result[k] = self._analyze_json_structure(v, max_depth, current_depth + 1)
+            if len(data) > 10:
+                result["..."] = f"{len(data) - 10} more keys"
+            return result
+        elif isinstance(data, list):
+            if len(data) == 0:
+                return []
+            if len(data) > 5:
+                return [
+                    self._analyze_json_structure(data[0], max_depth, current_depth + 1),
+                    "...",
+                    f"{len(data)} items total"
+                ]
+            return [self._analyze_json_structure(item, max_depth, current_depth + 1) for item in data]
+        else:
+            return type(data).__name__
+class DataDeityApp:
+    def __init__(self):
+        self.db_manager = DatabaseManager()
+        self.file_processor = FileProcessor(self.db_manager)
+        self.processed_data = {}
+    def run(self, file):
+        try:
+            file_id, chunk_count = self.file_processor.process_file(file)
+            self.processed_data[file.name] = file_id
+            return chunk_count
+        except Exception as e:
+            print(f"Error in app.run: {e}")
+            print(traceback.format_exc())
+            return 0
+    def get_analysis_results(self, file_id):
+        try:
+            file_info = self.db_manager.get_file_by_id(file_id)
+            if not file_info:
+                return {"Error": "File not found"}
+            file_type = file_info[5]
+            analyses = self.db_manager.get_analysis_by_file_id(file_id)
+            insights = self.db_manager.get_insights_by_file_id(file_id)
+            results = {}
+            results["File Information"] = f"""
+            <div class="file-info">
+                <p><strong>Filename:</strong> {file_info[1]}</p>
+                <p><strong>Size:</strong> {file_info[4]} bytes</p>
+                <p><strong>Type:</strong> {file_info[5]}</p>
+            </div>
+            """
+            if file_type == 'text':
+                for analysis_type, analysis_result in analyses:
+                    if analysis_type == 'sentiment_analysis':
+                        sentiment = json.loads(analysis_result)
+                        results["Sentiment Analysis"] = f"""
+                        <div class="sentiment-analysis">
+                            <p><strong>Compound Score:</strong> {sentiment['compound']:.4f}</p>
+                            <p><strong>Positive:</strong> {sentiment['pos']:.4f}</p>
+                            <p><strong>Neutral:</strong> {sentiment['neu']:.4f}</p>
+                            <p><strong>Negative:</strong> {sentiment['neg']:.4f}</p>
+                            <div class="sentiment-bar" style="background: linear-gradient(to right,
+                                #ff4d4d 0%,
+                                #ff4d4d {sentiment['neg']*100}%,
+                                #f2f2f2 {sentiment['neg']*100}%,
+                                #f2f2f2 {(sentiment['neg']+sentiment['neu'])*100}%,
+                                #4dff4d {(sentiment['neg']+sentiment['neu'])*100}%,
+                                #4dff4d 100%);
+                                height: 20px; border-radius: 5px;">
+                            </div>
+                        </div>
+                        """
+                    elif analysis_type == 'tokenization':
+                        token_data = json.loads(analysis_result)
+                        results["Text Tokenization"] = f"""
+                        <div class="tokenization">
+                            <p><strong>Total Tokens:</strong> {token_data['total_tokens']}</p>
+                            <p><strong>Unique Tokens:</strong> {token_data['unique_tokens']}</p>
+                            <p><strong>Tokens without Stopwords:</strong> {token_data['tokens_without_stopwords']}</p>
+                            <p><strong>Sample Tokens:</strong> {', '.join(token_data['sample_tokens'])}</p>
+                        </div>
+                        """
+            elif file_type == 'csv':
+                for analysis_type, analysis_result in analyses:
+                    if analysis_type == 'statistical_analysis':
+                        stats = json.loads(analysis_result) # stats is now a dictionary
+                        stats_html = "<div class='stats-table'><table>"
+                        stats_html += "<tr><th>Statistic</th>"
+                        # Corrected line: stats is already a dict, no need for json.loads()
+                        columns = list(stats.keys())
+                        for col in columns:
+                            stats_html += f"<th>{col}</th>"
+                        stats_html += "</tr>"
+                        metrics = ['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']
+                        for metric in metrics:
+                            stats_html += f"<tr><td>{metric}</td>"
+                            for col in columns:
+                                # Corrected line: stats is already a dict, col_stats = stats[col]
+                                col_stats = stats[col]
+                                if metric in col_stats:
+                                    value = col_stats[metric]
+                                    stats_html += f"<td>{value:.4f if isinstance(value, float) else value}</td>"
+                                else:
+                                    stats_html += "<td>N/A</td>"
+                            stats_html += "</tr>"
+                        stats_html += "</table></div>"
+                        results["Statistical Analysis"] = stats_html
+                    elif analysis_type == 'predictive_model':
+                        model_data = json.loads(analysis_result)
+                        results["Predictive Model"] = f"""
+                        <div class="predictive-model">
+                            <p><strong>Target Column:</strong> {model_data['target_column']}</p>
+                            <p><strong>Feature Columns:</strong> {', '.join(model_data['feature_columns'])}</p>
+                            <p><strong>Model Performance:</strong></p>
+                            <ul>
+                                <li>Mean Squared Error: {model_data['mean_squared_error']:.4f}</li>
+                                <li>R² Score: {model_data['r2_score']:.4f}</li>
+                            </ul>
+                            <p><strong>Feature Importance:</strong></p>
+                            <div class="feature-importance">
+                                {''.join([f'<div style="margin-bottom:5px;"><span>{feat}</span>: <div style="display:inline-block;width:{imp*100}%;background-color:#4CAF50;height:10px;"></div> {imp:.4f}</div>' for feat, imp in sorted(model_data['feature_importance'].items(), key=lambda x: x[1], reverse=True)])}
+                            </div>
+                        </div>
+                        """
+            elif file_type == 'json':
+                for analysis_type, analysis_result in analyses:
+                    if analysis_type == 'structure_analysis':
+                        structure = json.loads(analysis_result)
+                        results["JSON Structure"] = f"""
+                        <div class="json-data">
+                            <p><strong>Structure Overview:</strong></p>
+                            <pre>{json.dumps(structure, indent=2)}</pre>
+                        </div>
+                        """
+            if insights:
+                insights_html = "<div class='insights'><h4>Key Insights</h4><ul>"
+                for insight_type, insight_text, confidence in insights:
+                    insights_html += f"<li><strong>{insight_type.title()}:</strong> {insight_text} (Confidence: {confidence:.2f})</li>"
+                insights_html += "</ul></div>"
+                results["Insights"] = insights_html
+            return results
+        except Exception as e:
+            print(f"Error getting analysis results: {e}")
+            print(traceback.format_exc())
+            return {"Error": str(e)}
+    def generate_report(self, file_id):
+        try:
+            file_info = self.db_manager.get_file_by_id(file_id)
+            if not file_info:
+                return None
+            filename = file_info[1]
+            file_type = file_info[5]
+            os.makedirs('reports', exist_ok=True)
+            report_filename = f"report_{os.path.splitext(filename)[0]}_{int(time.time())}.html"
+            report_path = os.path.join('reports', report_filename)
+            analyses = self.db_manager.get_analysis_by_file_id(file_id)
+            insights = self.db_manager.get_insights_by_file_id(file_id)
+            with open(report_path, 'w', encoding='utf-8') as f:
+                f.write(f"""<!DOCTYPE html>
+                <html>
+                <head>
+                    <title>Analysis Report: {filename}</title>
+                    <style>
+                        body {{ font-family: Arial, sans-serif; margin: 20px; }}
+                        h1, h2, h3 {{ color: #333; }}
+                        .container {{ max-width: 1200px; margin: 0 auto; }}
+                        .section {{ margin-bottom: 30px; padding: 20px; border: 1px solid #ddd; border-radius: 5px; }}
+                        .file-info {{ background-color: #f9f9f9; padding: 15px; border-radius: 5px; }}
+                        table {{ border-collapse: collapse; width: 100%; }}
+                        th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
+                        th {{ background-color: #f2f2f2; }}
+                        pre {{ background-color: #f8f9fa; padding: 15px; border-radius: 5px; overflow-x: auto; }}
+                        .sentiment-bar {{ margin-top: 10px; }}
+                        .insights {{ background-color: #f0f7ff; padding: 15px; border-radius: 5px; }}
+                    </style>
+                </head>
+                <body>
+                    <div class="container">
+                        <h1>Analysis Report: {filename}</h1>
+                        <div class="section">
+                            <h2>File Information</h2>
+                            <div class="file-info">
+                                <p><strong>Filename:</strong> {filename}</p>
+                                <p><strong>Size:</strong> {file_info[4]} bytes</p>
+                                <p><strong>Type:</strong> {file_type}</p>
+                                <p><strong>Upload Date:</strong> {file_info[6]}</p>
+                            </div>
+                        </div>
+                """)
+                if file_type == 'text':
+                    for analysis_type, analysis_result in analyses:
+                        if analysis_type == 'sentiment_analysis':
+                            sentiment = json.loads(analysis_result)
+                            f.write(f"""
+                            <div class="section">
+                                <h2>Sentiment Analysis</h2>
+                                <p><strong>Compound Score:</strong> {sentiment['compound']:.4f}</p>
+                                <p><strong>Positive:</strong> {sentiment['pos']:.4f}</p>
+                                <p><strong>Neutral:</strong> {sentiment['neu']:.4f}</p>
+                                <p><strong>Negative:</strong> {sentiment['neg']:.4f}</p>
+                                <div class="sentiment-bar" style="background: linear-gradient(to right,
+                                    #ff4d4d 0%,
+                                    #ff4d4d {sentiment['neg']*100}%,
+                                    #f2f2f2 {sentiment['neg']*100}%,
+                                    #f2f2f2 {(sentiment['neg']+sentiment['neu'])*100}%,
+                                    #4dff4d {(sentiment['neg']+sentiment['neu'])*100}%,
+                                    #4dff4d 100%);
+                                    height: 20px; border-radius: 5px;">
+                                </div>
+                            </div>
+                            """)
+                        elif analysis_type == 'tokenization':
+                            token_data = json.loads(analysis_result)
+                            f.write(f"""
+                            <div class="section">
+                                <h2>Text Tokenization</h2>
+                                <p><strong>Total Tokens:</strong> {token_data['total_tokens']}</p>
+                                <p><strong>Unique Tokens:</strong> {token_data['unique_tokens']}</p>
+                                <p><strong>Tokens without Stopwords:</strong> {token_data['tokens_without_stopwords']}</p>
+                                <p><strong>Sample Tokens:</strong> {', '.join(token_data['sample_tokens'])}</p>
+                            </div>
+                            """)
+                elif file_type == 'csv':
+                    for analysis_type, analysis_result in analyses:
+                        if analysis_type == 'statistical_analysis':
+                            stats = json.loads(analysis_result) # stats is now a dictionary
+                            f.write("""
+                            <div class="section">
+                                <h2>Statistical Analysis</h2>
+                                <table>
+                                    <tr>
+                                        <th>Statistic</th>
+                            """)
+                            # Corrected line: stats is already a dict, no need for json.loads()
+                            columns = list(stats.keys())
+                            for col in columns:
+                                f.write(f"<th>{col}</th>")
+                            f.write("</tr>")
+                            metrics = ['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']
+                            for metric in metrics:
+                                f.write(f"<tr><td>{metric}</td>")
+                                for col in columns:
+                                    # Corrected line: stats is already a dict, col_stats = stats[col]
+                                    col_stats = stats[col]
+                                    if metric in col_stats:
+                                        value = col_stats[metric]
+                                        f.write(f"<td>{value:.4f if isinstance(value, float) else value}</td>")
+                                    else:
+                                        f.write("<td>N/A</td>")
+                                f.write("</tr>")
+                            f.write("""
+                                </table>
+                            </div>
+                            """)
+                        elif analysis_type == 'predictive_model':
+                            model_data = json.loads(analysis_result)
+                            f.write(f"""
+                            <div class="section">
+                                <h2>Predictive Model</h2>
+                                <p><strong>Target Column:</strong> {model_data['target_column']}</p>
+                                <p><strong>Feature Columns:</strong> {', '.join(model_data['feature_columns'])}</p>
+                                <p><strong>Model Performance:</strong></p>
+                                <ul>
+                                    <li>Mean Squared Error: {model_data['mean_squared_error']:.4f}</li>
+                                    <li>R² Score: {model_data['r2_score']:.4f}</li>
+                                </ul>
+                                <p><strong>Feature Importance:</strong></p>
+                                <div class="feature-importance">
+                                    {''.join([f'<div style="margin-bottom:5px;"><span>{feat}</span>: <div style="display:inline-block;width:{imp*100}%;background-color:#4CAF50;height:10px;"></div> {imp:.4f}</div>' for feat, imp in sorted(model_data['feature_importance'].items(), key=lambda x: x[1], reverse=True)])}
+                                </div>
+                            </div>
+                            """)
+                elif file_type == 'json':
+                    for analysis_type, analysis_result in analyses:
+                        if analysis_type == 'structure_analysis':
+                            structure = json.loads(analysis_result)
+                            f.write(f"""
+                            <div class="section">
+                                <h2>JSON Structure</h2>
+                                <pre>{json.dumps(structure, indent=2)}</pre>
+                            </div>
+                            """)
+                if insights:
+                    f.write("""
+                    <div class="section">
+                        <h2>Key Insights</h2>
+                        <div class="insights">
+                            <ul>
+                    """)
+                    for insight_type, insight_text, confidence in insights:
+                        f.write(f"<li><strong>{insight_type.title()}:</strong> {insight_text} (Confidence: {confidence:.2f})</li>")
+                    f.write("""
+                            </ul>
+                        </div>
+                    </div>
+                    """)
+                f.write("""
+                    </div>
+                    <footer style="text-align: center; margin-top: 30px; color: #777;">
+                        <p>Generated on {datetime_now}</p>
+                    </footer>
+                </body>
+                </html>
+                """.format(datetime_now=datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
+            return report_path
+        except Exception as e:
+            print(f"Error generating report: {e}")
+            print(traceback.format_exc())
+            return None
+    def cleanup(self):
+        try:
+            self.db_manager.close()
+        except Exception as e:
+            print(f"Error during cleanup: {e}")
+def main():
+    import time
+    import datetime
+    app = DataDeityApp()
+    custom_css = """
+    body {
+        font-family: 'Arial', sans-serif;
+    }
+    .analysis-results {
+        max-height: 800px;
+        overflow-y: auto;
+        padding: 15px;
+        border-radius: 5px;
+        border: 1px solid #eee;
+    }
+    .sentiment-analysis, .tokenization, .json-data {
+        margin: 15px 0;
+        padding: 15px;
+        border: 1px solid #eee;
+        border-radius: 5px;
+    }
+    pre {
+        background-color: #f8f9fa;
+        padding: 15px;
+        border-radius: 5px;
+        overflow-x: auto;
+    }
+    .stats-table table {
+        width: 100%;
+        border-collapse: collapse;
+    }
+    .stats-table th, .stats-table td {
+        border: 1px solid #ddd;
+        padding: 8px;
+        text-align: left;
+    }
+    .stats-table th {
+        background-color: #f2f2f2;
+    }
+    .error-message {
+        color: #d9534f;
+        padding: 15px;
+        border: 1px solid #d9534f;
+        border-radius: 5px;
+    }
+    .feature-importance {
+        margin-top: 10px;
+    }
+    .insights {
+        background-color: #f0f7ff;
+        padding: 15px;
+        border-radius: 5px;
+    }
+    """
+    def process_and_display(file):
+        try:
+            if file is None:
+                return """
+                <div class="error-message">
+                    <h2>No File Selected</h2>
+                    <p>Please upload a file to analyze.</p>
+                </div>
+                """
+            chunk_count = app.run(file)
+            file_id = app.processed_data.get(file.name)
+            if file_id is not None:
+                analysis_results = app.get_analysis_results(file_id)
+                output_html = f"""
+                <div class="analysis-results">
+                    <h2>Analysis Results for {file.name}</h2>
+                    <p>Processed {chunk_count} chunks</p>
+                """
+                for key, value in analysis_results.items():
+                    output_html += f"""
+                    <div class="result-section">
+                        <h3>{key}</h3>
+                        {value}
+                    </div>
+                    """
+                output_html += "</div>"
+                return output_html
+            else:
+                return f"""
+                <div class="error-message">
+                    <h2>Processing Error</h2>
+                    <p>Failed to process file: {file.name}</p>
+                    <p>Chunks processed: {chunk_count}</p>
+                </div>
+                """
+        except Exception as e:
+            print(f"Error in process_and_display: {e}")
+            print(traceback.format_exc())
+            return f"""
+            <div class="error-message">
+                <h2>Error</h2>
+                <p>An error occurred while processing the file: {str(e)}</p>
+            </div>
+            """
+    def generate_and_download_report(file):
+        try:
+            if file is None:
+                return None
+            file_id = app.processed_data.get(file.name)
+            if file_id is not None:
+                report_path = app.generate_report(file_id)
+                if report_path:
+                    return report_path
+            return None
+        except Exception as e:
+            print(f"Error generating report: {e}")
+            print(traceback.format_exc())
+            return None
+    with gr.Blocks(css=custom_css) as demo:
+        gr.Markdown("""
+        # Advanced File Processing & Analysis Application
+        This application provides comprehensive analysis of text, CSV, and JSON files.
+        ### Supported File Types:
+        - Text Files (.txt): Sentiment analysis and text tokenization
+        - CSV Files (.csv): Statistical analysis and predictive modeling
+        - JSON Files (.json): Structure analysis and data exploration
+        ### Features:
+        - Automated data processing and chunking
+        - Advanced analytics and insights
+        - Downloadable analysis reports
+        """)
+        with gr.Row():
+            file_input = gr.File(label="Upload a file (.txt, .csv, or .json)")
+        with gr.Row():
+            analyze_btn = gr.Button("Analyze File", variant="primary")
+            download_btn = gr.Button("Download Report", variant="secondary")
+        output = gr.HTML(label="Analysis Results")
+        report_output = gr.File(label="Download Report")
+        analyze_btn.click(
+            fn=process_and_display,
+            inputs=[file_input],
+            outputs=[output]
+        )
+        download_btn.click(
+            fn=generate_and_download_report,
+            inputs=[file_input],
+            outputs=[report_output]
+        )
+    demo.launch(share=True)
+if __name__ == "__main__":
+    main()

huggingface.yml ADDED Viewed

	@@ -0,0 +1,3 @@

+version: 0.1
+docker:
+  image: acecalisto3/Dbgod

nltk_setup.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+import nltk
+import logging
+from pathlib import Path
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+def setup_nltk():
+    """
+    Set up NLTK data in a local directory to avoid permission issues.
+    Downloads required NLTK packages if they're not already present.
+    """
+    try:
+        # Create a local directory for NLTK data
+        nltk_data_dir = Path('./nltk_data')
+        nltk_data_dir.mkdir(exist_ok=True)
+        # Add the local directory to NLTK's data path
+        nltk.data.path.append(str(nltk_data_dir))
+        # Required NLTK packages
+        required_packages = ['punkt', 'vader_lexicon', 'stopwords']
+        for package in required_packages:
+            try:
+                # Try to load the package first
+                nltk.data.find(f'tokenizers/{package}' if package == 'punkt'
+                             else f'sentiment/{package}' if package == 'vader_lexicon'
+                             else f'corpora/{package}')
+                logger.info(f"Package '{package}' is already downloaded")
+            except LookupError:
+                # If package is not found, download it
+                logger.info(f"Downloading package '{package}'...")
+                nltk.download(package, download_dir=str(nltk_data_dir))
+                logger.info(f"Successfully downloaded package '{package}'")
+        logger.info("NLTK setup completed successfully")
+        return True
+    except PermissionError as e:
+        logger.error(f"Permission error while setting up NLTK: {e}")
+        return False
+    except Exception as e:
+        logger.error(f"Unexpected error during NLTK setup: {e}")
+        return False
+if __name__ == "__main__":
+    setup_nltk()

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+matplotlib
+gradio
+pandas
+uvicorn
+numpy
+nltk
+scikit-learn
+seaborn
+psycopg2-binary
+watchdog
+redis
+beautifulsoup4
+pymysql
+pysqlite3-binary
+statsmodels
+pymongo
+python-dotenv