Spaces:

Vashishta-S-2141
/

LLM_Powered_Database_Chatbot

Sleeping

App Files Files Community

SVashishta1 commited on Mar 3

Commit

6e54ca7

1 Parent(s): f35c7b5

Error Fix

Browse files

Files changed (1) hide show

app.py +216 -240

app.py CHANGED Viewed

@@ -6,10 +6,9 @@ import tempfile
 import pandas as pd
 import sqlite3
 from langchain_core.prompts import ChatPromptTemplate
 import plotly.express as px
-import plotly.io as pio
 import time
-from functools import lru_cache
 # Load environment variables
 load_dotenv()
@@ -18,164 +17,210 @@ load_dotenv()
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from backend.main import DocumentAssistant
-from backend.db import SimpleDB
-from backend.vector_db import ChromaVectorDB
-from backend.query_engine import QueryEngine
-from backend.document_parser import SimpleDocumentParser
-# Initialize components
-db = SimpleDB()
-vector_db = ChromaVectorDB(os.getenv("CHROMA_DB_PATH", "./data/chroma_db"))
-query_engine = QueryEngine()
-# Initialize the document parser
-document_parser = SimpleDocumentParser()
-# Initialize DocumentAssistant
 document_assistant = DocumentAssistant()
 # Database path for CSV data
 DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "csv_data.db")
 os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
-# Define the prompt with examples
-query_prompt = ChatPromptTemplate.from_messages([
-    ("system", """You are an SQL expert. Generate an appropriate SQL query using SQLite syntax for the question provided. The query should be executable and return exactly what was asked for.
-For questions about maximum/highest values, use MAX().
-For minimum/lowest values, use MIN().
-For averages, use AVG().
-For counts, use COUNT().
-For sums, use SUM().
-For visualization queries:
-1. For trends over time:
-   - Group by appropriate time unit (day, month, year)
-   - Include relevant aggregations (AVG, COUNT, SUM)
-2. For distributions:
-   - Group by the value being distributed
-   - Include COUNT or frequency
-3. For comparisons:
-   - Include multiple measures
-   - Order appropriately
-Examples:
-1. Question: "Plot tip amount trends by month"
-   SQL: SELECT strftime('%Y-%m', pickup_datetime) as month, AVG(tip_amount) as avg_tip, COUNT(*) as count FROM data_tab GROUP BY month ORDER BY month;
-2. Question: "Show distribution of fare amounts"
-   SQL: SELECT fare_amount, COUNT(*) as frequency FROM data_tab GROUP BY fare_amount ORDER BY fare_amount;
-3. Question: "What is the highest tip_amount in the dataset?"
-   SQL: SELECT MAX(tip_amount) as highest_tip FROM data_tab;
-Generate only the SQL query, nothing else. Make sure to use the correct table name from the context provided."""),
-    ("human", "{question}")
-])
 # Define the prompt for interpreting the SQL query result
 interpret_prompt = ChatPromptTemplate.from_messages(
     [
-        ("system", "You are an experienced data analyst. Examine the following data and provide a clear analysis. Base your analysis solely on the provided data."),
-        ("human", "Question: {question}\n\nSQL Query: {sql_query}\n\nData:\n{data}")
     ]
 )
-# Add this as a global variable to track current context
-current_context = {
-    "file_type": None,  # 'csv' or 'pdf' or None
-    "file_name": None,
-    "table_name": None
-}
-# Add a simple cache for database schema information
-@lru_cache(maxsize=32)
-def get_table_info(table_name):
-    """Get cached table information"""
-    conn = sqlite3.connect(DB_PATH)
-    cursor = conn.cursor()
-    # Get column info
-    cursor.execute(f"PRAGMA table_info({table_name});")
-    columns = [f"{col[1]} ({col[2]})" for col in cursor.fetchall()]
-    # Get row count - use approximate count for large tables
-    cursor.execute(f"SELECT COUNT(*) FROM {table_name} LIMIT 1;")
-    row_count = cursor.fetchone()[0]
-    conn.close()
-    return columns, row_count
-# Optimize the process_text_query function
 def process_text_query(query, history):
     """Process a text query and update chat history"""
     if not query:
         return "", history
-    start_time = time.time()
-    # Add a loading message to the history
     history.append({"role": "user", "content": query})
-    history.append({"role": "assistant", "content": "Processing your query..."})
-    # Use the current context to determine how to process the query
     if current_context["file_type"] == "csv" and current_context["table_name"]:
-        table_name = current_context["table_name"]
         try:
-            # Generate SQL query - simplified for performance
-            sql_query = f"""
-            SELECT * FROM {table_name} LIMIT 10;
-            """
-            # For specific types of queries, use optimized SQL
-            if "highest" in query.lower() or "maximum" in query.lower() or "max" in query.lower():
-                # Extract the column name from the query
-                for col in ["tip_amount", "fare_amount", "total_amount"]:
-                    if col in query.lower():
-                        sql_query = f"SELECT MAX({col}) as max_value FROM {table_name};"
-                        break
-            elif "average" in query.lower() or "mean" in query.lower() or "avg" in query.lower():
-                # Extract the column name from the query
-                for col in ["tip_amount", "fare_amount", "total_amount"]:
-                    if col in query.lower():
-                        sql_query = f"SELECT AVG({col}) as avg_value FROM {table_name};"
-                        break
-            # Execute the query with timeout
-            conn = sqlite3.connect(DB_PATH, timeout=10)
-            conn.execute("PRAGMA temp_store = MEMORY;")  # Store temp tables in memory
-            conn.execute("PRAGMA journal_mode = OFF;")   # Disable journaling
-            conn.execute("PRAGMA synchronous = OFF;")    # Disable synchronous writes
-            # Use pandas with a small chunk size for large tables
-            result_df = pd.read_sql_query(sql_query, conn, chunksize=1000)
-            # Process the first chunk only for performance
-            if hasattr(result_df, '__next__'):
-                result_df = next(result_df)
-            # Format the response
-            if len(result_df) > 0:
-                data_str = result_df.to_string(max_rows=5)
-                response = f"**Results:**\n```\n{data_str}\n```\n\n"
-                # Add a simple interpretation based on the query type
-                if "highest" in query.lower() or "maximum" in query.lower():
-                    for col in result_df.columns:
-                        if "max" in col.lower():
-                            response += f"The highest value is {result_df[col].iloc[0]}."
-                            break
-                elif "average" in query.lower() or "mean" in query.lower():
-                    for col in result_df.columns:
-                        if "avg" in col.lower():
-                            response += f"The average value is {result_df[col].iloc[0]:.2f}."
-                            break
                 else:
-                    response += f"Here are the first {len(result_df)} results from the table."
-            else:
-                response = "No results found for your query."
             conn.close()
@@ -193,8 +238,8 @@ def process_text_query(query, history):
     processing_time = time.time() - start_time
     response += f"\n\n(Query processed in {processing_time:.2f} seconds)"
-    # Update the last message with the actual response
-    history[-1] = {"role": "assistant", "content": response}
     return "", history
@@ -223,40 +268,43 @@ def process_file_upload(files):
                 # Create table name from filename
                 table_name = os.path.splitext(file_name)[0].replace(' ', '_').lower()
-                # Create a new connection for each file
                 conn = sqlite3.connect(DB_PATH)
-                # Load CSV into SQLite
-                file_info.append(f"Loading CSV file: {file_name}...")
-                load_csv_to_sqlite(file_path, conn, table_name)
                 # Update current context
                 current_context = {
                     "file_type": "csv",
                     "file_name": file_name,
-                    "table_name": table_name
                 }
-                # Get basic info about the table
                 cursor = conn.cursor()
-                cursor.execute(f"SELECT COUNT(*) FROM {table_name};")
-                row_count = cursor.fetchone()[0]
-                cursor.execute(f"PRAGMA table_info({table_name});")
-                columns = [col[1] for col in cursor.fetchall()]
                 conn.close()
                 file_info.append("✅ CSV File Successfully Loaded")
-                file_info.append(f"📊 Table Name: {table_name}")
                 file_info.append(f"📈 Total Rows: {row_count:,}")
-                file_info.append(f"📋 Columns: {len(columns)}")
             except Exception as e:
                 file_info.append(f"❌ Error loading CSV {file_name}: {str(e)}")
-                # Print the full error for debugging
-                import traceback
-                print(traceback.format_exc())
         else:
             # Process PDF or other document types
@@ -279,83 +327,6 @@ def process_file_upload(files):
     return "\n".join(file_info)
-def process_voice_input(audio_path):
-    """Process voice input and return transcribed text"""
-    if audio_path is None:
-        return "No audio recorded"
-    # Since we don't have VoiceAssistant, return a placeholder message
-    return "Voice transcription is not available"
-def text_to_speech_output(text):
-    """Convert text to speech"""
-    if not text or len(text) == 0:
-        return None
-    # Extract the last assistant message
-    last_message = None
-    for msg in reversed(text):
-        if msg["role"] == "assistant":
-            last_message = msg["content"]
-            break
-    if not last_message:
-        return None
-    # Since we don't have VoiceAssistant, return None
-    return None
-# Optimize the load_csv_to_sqlite function
-def load_csv_to_sqlite(file_path, conn, table_name):
-    """Load CSV data into SQLite database with optimizations"""
-    # Use larger chunk size for faster loading
-    chunksize = 10000
-    # Configure SQLite for faster imports
-    conn.execute("PRAGMA synchronous = OFF")
-    conn.execute("PRAGMA journal_mode = MEMORY")
-    conn.execute("PRAGMA temp_store = MEMORY")
-    conn.execute("PRAGMA cache_size = 10000")
-    try:
-        # Start transaction manually
-        conn.execute("BEGIN TRANSACTION")
-        # Read the CSV in chunks
-        for i, chunk in enumerate(pd.read_csv(file_path, chunksize=chunksize)):
-            # Optimize column types
-            for col in chunk.columns:
-                # Convert date columns to datetime
-                if 'date' in col.lower() or 'time' in col.lower():
-                    try:
-                        chunk[col] = pd.to_datetime(chunk[col], errors='coerce')
-                    except:
-                        pass
-            # Load the chunk into the SQLite database
-            if_exists = 'replace' if i == 0 else 'append'
-            chunk.to_sql(table_name, conn, if_exists=if_exists, index=False, method='multi')
-        # Create indices for common query columns
-        for col in ['pickup_datetime', 'dropoff_datetime', 'tip_amount', 'fare_amount', 'total_amount']:
-            try:
-                if col in chunk.columns:  # Only create index if column exists
-                    conn.execute(f"CREATE INDEX IF NOT EXISTS idx_{table_name}_{col} ON {table_name}({col})")
-            except Exception as idx_error:
-                print(f"Warning: Could not create index on {col}: {str(idx_error)}")
-        # Commit the transaction
-        conn.commit()
-        print(f"Successfully loaded {table_name} into database")
-    except Exception as e:
-        # Only try to rollback if we're in a transaction
-        try:
-            conn.rollback()
-        except:
-            pass  # If rollback fails, just continue
-        raise e
 def list_documents():
     """List all indexed documents"""
     info_list = []
@@ -378,22 +349,7 @@ def list_documents():
                 cursor.execute(f"SELECT COUNT(*) FROM {table[0]};")
                 row_count = cursor.fetchone()[0]
-                # Get sample of unique values for some interesting columns
-                sample_info = []
-                for col in ['vendor_id', 'rate_code', 'payment_type']:
-                    if col in columns:
-                        cursor.execute(f"SELECT DISTINCT {col} FROM {table[0]} LIMIT 5;")
-                        unique_vals = [str(row[0]) for row in cursor.fetchall()]
-                        if unique_vals:
-                            sample_info.append(f"{col}: {', '.join(unique_vals)}")
-                info_list.append(f"\n🔹 Table: {table[0]}")
-                info_list.append(f"  - Rows: {row_count:,}")
-                info_list.append(f"  - Columns: {len(columns)}")
-                if sample_info:
-                    info_list.append("  - Sample values:")
-                    for info in sample_info:
-                        info_list.append(f"    • {info}")
         conn.close()
     except Exception as e:
@@ -421,6 +377,32 @@ def clear_context():
     }
     return None
 # Create Gradio interface
 with gr.Blocks(title="AI Document Analysis & Voice Assistant") as demo:
     gr.Markdown("# 🤖 AI Document Analysis & Voice Assistant")
@@ -466,6 +448,7 @@ with gr.Blocks(title="AI Document Analysis & Voice Assistant") as demo:
         )
         clear_btn.click(lambda: None, None, chatbot, queue=False)
         voice_btn.click(
             lambda: gr.update(visible=True),
@@ -486,13 +469,6 @@ with gr.Blocks(title="AI Document Analysis & Voice Assistant") as demo:
             inputs=[chatbot],
             outputs=[audio_output]
         )
-        # Add event handler for clear context button
-        clear_context_btn.click(
-            clear_context,
-            inputs=[],
-            outputs=[chatbot]
-        )
     with gr.Tab("Document Upload"):
         file_upload = gr.File(

 import pandas as pd
 import sqlite3
 from langchain_core.prompts import ChatPromptTemplate
+from langchain_groq import ChatGroq
 import plotly.express as px
 import time
 # Load environment variables
 load_dotenv()
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from backend.main import DocumentAssistant
+# Initialize the document assistant
 document_assistant = DocumentAssistant()
+# Initialize the LLM using the llama3-8b-8192 model from Groq
+llm = ChatGroq(
+    model="llama3-8b-8192",
+    temperature=0,
+    max_tokens=None,
+    timeout=None,
+    max_retries=2,
+    verbose=True,
+    api_key=os.getenv("GROQ_API_KEY")
+)
 # Database path for CSV data
 DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "csv_data.db")
 os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
+# Current context to track what we're working with
+current_context = {
+    "file_type": None,
+    "file_name": None,
+    "table_name": None
+}
+# Define the prompt with examples for SQL query generation
+query_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", """
+            You are an SQL and data analysis expert. Generate an appropriate SQL query using SQLite syntax for the question provided, without any explanations or code comments.
+            Follow SQLite-specific conventions, as shown in the examples below:
+            Example 1:
+            Question: "What is the average fare for trips over 10 miles?"
+            SQL Query: SELECT AVG(fare_amount) FROM data_tab WHERE trip_distance > 10;
+            Example 2:
+            Question: "How many trips were taken in each month?"
+            SQL Query: SELECT strftime('%m', pickup_datetime) AS month, COUNT(*) AS trip_count FROM data_tab GROUP BY month;
+            Example 3:
+            Question: "What is the total fare amount for each driver (medallion) per day?"
+            SQL Query: SELECT DATE(pickup_datetime) AS date, medallion, SUM(fare_amount) AS total_fare FROM data_tab GROUP BY date, medallion;
+            Example 4:
+            Question: "What is the highest tip amount in the dataset?"
+            SQL Query: SELECT MAX(tip_amount) as highest_tip FROM data_tab;
+            Example 5:
+            Question: "Plot a bar graph for tip trends by month"
+            SQL Query: SELECT strftime('%Y-%m', pickup_datetime) as month, AVG(tip_amount) as avg_tip, COUNT(*) as count FROM data_tab GROUP BY month ORDER BY month;
+            SQLite-Specific Conventions:
+            1. Date and Time Extraction:
+               - Instead of `EXTRACT(YEAR FROM column)`, use `strftime('%Y', column)` to extract the year.
+               - Example: `SELECT strftime('%Y', pickup_datetime) FROM data_tab;`
+            2. String Length:
+               - Instead of `CHAR_LENGTH(column)`, use `LENGTH(column)`.
+               - Example: `SELECT LENGTH(passenger_name) FROM data_tab;`
+            3. Regular Expressions:
+               - SQLite does not support `REGEXP`. Use `LIKE` for simple patterns or avoid regular expressions.
+               - Example: `SELECT * FROM data_tab WHERE passenger_name LIKE 'A%';`
+            4. Window Functions:
+               - For row numbering, use `ROW_NUMBER()` if supported, or simulate with joins.
+               - Example: `SELECT id, ROW_NUMBER() OVER (ORDER BY pickup_datetime) AS row_num FROM data_tab;`
+            5. Data Type Casting:
+               - Use `CAST(column AS TYPE)`, but note that SQLite supports limited types.
+               - Example: `SELECT CAST(fare_amount AS INTEGER) FROM data_tab;`
+            6. Full Outer Join Workaround:
+               - SQLite doesn't support `FULL OUTER JOIN`. Combine `LEFT JOIN` and `UNION` for a similar effect.
+               - Example:
+                 ```
+                 SELECT a.*, b.*
+                 FROM table_a a
+                 LEFT JOIN table_b b ON a.id = b.id
+                 UNION
+                 SELECT a.*, b.*
+                 FROM table_a a
+                 RIGHT JOIN table_b b ON a.id = b.id;
+                 ```
+            Use these examples and guidelines to generate an SQL query compatible with SQLite syntax for the question provided.
+            Always use 'data_tab' as the table name.
+        """),
+        ("human", "{question}"),
+    ]
+)
 # Define the prompt for interpreting the SQL query result
 interpret_prompt = ChatPromptTemplate.from_messages(
     [
+        ("system", "You are an experienced data analyst. Provide a concise, natural language answer based on the given data summary. If relevant, give key statistics, trends, or patterns."),
+        ("human", "Question: {question}\nSQL Query: {sql_query}\nData Summary:\n{data_summary}")
     ]
 )
 def process_text_query(query, history):
     """Process a text query and update chat history"""
     if not query:
         return "", history
+    # Add the user's query to history
     history.append({"role": "user", "content": query})
+    start_time = time.time()
+    # Check if we're in CSV context
     if current_context["file_type"] == "csv" and current_context["table_name"]:
         try:
+            # Connect to the database
+            conn = sqlite3.connect(DB_PATH)
+            # Get column information for context
+            cursor = conn.cursor()
+            cursor.execute(f"PRAGMA table_info({current_context['table_name']});")
+            columns = [info[1] for info in cursor.fetchall()]
+            columns_str = ", ".join(columns)
+            # Create question with context
+            question_with_context = f"The table 'data_tab' has columns: {columns_str}. {query}"
+            # Generate SQL query using LLM
+            ai_msg = query_prompt | llm
+            sql_query = ai_msg.invoke({"question": question_with_context}).content.strip()
+            print(f"Generated SQL Query: {sql_query}")
+            # Check if this is a visualization request
+            is_visualization = any(word in query.lower() for word in ['plot', 'graph', 'chart', 'visualize', 'visualization', 'trend'])
+            try:
+                # Execute the query
+                result_df = pd.read_sql_query(sql_query, conn)
+                # Generate data summary
+                if not result_df.empty:
+                    data_summary = result_df.describe(include='all').to_string()
+                    # For small result sets, include the actual data
+                    if len(result_df) <= 10:
+                        data_summary += f"\n\nFull Results:\n{result_df.to_string()}"
+                    else:
+                        data_summary += f"\n\nFirst 5 rows:\n{result_df.head(5).to_string()}"
+                else:
+                    data_summary = "No relevant data found."
+                # Generate interpretation
+                answer_chain = interpret_prompt | llm
+                interpretation = answer_chain.invoke({
+                    "question": query,
+                    "sql_query": sql_query,
+                    "data_summary": data_summary
+                }).content.strip()
+                # Create the response
+                response = f"**SQL Query:**\n```sql\n{sql_query}\n```\n\n"
+                if not result_df.empty:
+                    if len(result_df) > 10:
+                        response += f"**Results (first 5 of {len(result_df)} rows):**\n```\n{result_df.head(5).to_string()}\n```\n\n"
+                    else:
+                        response += f"**Results:**\n```\n{result_df.to_string()}\n```\n\n"
                 else:
+                    response += "**No results found.**\n\n"
+                response += f"**Analysis:**\n{interpretation}"
+                # Add visualization if requested
+                if is_visualization and not result_df.empty:
+                    try:
+                        # Determine the type of visualization based on the data
+                        if len(result_df.columns) >= 2:
+                            # Find numeric columns for y-axis
+                            numeric_cols = result_df.select_dtypes(include=['number']).columns.tolist()
+                            if len(numeric_cols) >= 1 and len(result_df) > 1:
+                                # Use the first column as x and first numeric column as y
+                                x_col = result_df.columns[0]
+                                y_col = numeric_cols[0]
+                                # Create appropriate plot based on data characteristics
+                                if 'month' in result_df.columns or 'date' in result_df.columns or 'year' in result_df.columns:
+                                    # Time series data - use line chart
+                                    fig = px.line(result_df, x=x_col, y=numeric_cols, title="Time Series Analysis")
+                                else:
+                                    # Regular data - use bar chart
+                                    fig = px.bar(result_df, x=x_col, y=y_col, title="Data Visualization")
+                                # Convert to HTML and add to response
+                                plot_html = fig.to_html(full_html=False, include_plotlyjs='cdn')
+                                response += f"\n\n**Visualization:**\n<div>{plot_html}</div>"
+                    except Exception as viz_error:
+                        print(f"Visualization error: {str(viz_error)}")
+                        # Continue without visualization if there's an error
+            except Exception as e:
+                response = f"**SQL Query:**\n```sql\n{sql_query}\n```\n\n**Error executing query:** {str(e)}"
             conn.close()
     processing_time = time.time() - start_time
     response += f"\n\n(Query processed in {processing_time:.2f} seconds)"
+    # Add the response to history
+    history.append({"role": "assistant", "content": response})
     return "", history
                 # Create table name from filename
                 table_name = os.path.splitext(file_name)[0].replace(' ', '_').lower()
+                # Load CSV into SQLite
                 conn = sqlite3.connect(DB_PATH)
+                # Configure SQLite for faster imports
+                conn.execute("PRAGMA synchronous = OFF")
+                conn.execute("PRAGMA journal_mode = MEMORY")
+                # Read the CSV and load it into SQLite
+                df = pd.read_csv(file_path)
+                df.to_sql('data_tab', conn, if_exists='replace', index=False)
                 # Update current context
                 current_context = {
                     "file_type": "csv",
                     "file_name": file_name,
+                    "table_name": "data_tab"  # Always use data_tab as the table name
                 }
+                # Get column info
                 cursor = conn.cursor()
+                cursor.execute("PRAGMA table_info(data_tab);")
+                columns = [f"{col[1]} ({col[2]})" for col in cursor.fetchall()]
+                # Get row count
+                cursor.execute("SELECT COUNT(*) FROM data_tab;")
+                row_count = cursor.fetchone()[0]
                 conn.close()
                 file_info.append("✅ CSV File Successfully Loaded")
+                file_info.append(f"📊 Table Name: data_tab")
+                file_info.append(f"📄 Source File: {file_name}")
                 file_info.append(f"📈 Total Rows: {row_count:,}")
+                file_info.append(f"📋 Columns: {', '.join(columns)}")
             except Exception as e:
                 file_info.append(f"❌ Error loading CSV {file_name}: {str(e)}")
         else:
             # Process PDF or other document types
     return "\n".join(file_info)
 def list_documents():
     """List all indexed documents"""
     info_list = []
                 cursor.execute(f"SELECT COUNT(*) FROM {table[0]};")
                 row_count = cursor.fetchone()[0]
+                info_list.append(f"- {table[0]} ({row_count:,} rows, {len(columns)} columns)")
         conn.close()
     except Exception as e:
     }
     return None
+def process_voice_input(audio_path):
+    """Process voice input and return transcribed text"""
+    if audio_path is None:
+        return "No audio recorded"
+    # Since we don't have VoiceAssistant, return a placeholder message
+    return "Voice transcription is not available"
+def text_to_speech_output(text):
+    """Convert text to speech"""
+    if not text or len(text) == 0:
+        return None
+    # Extract the last assistant message
+    last_message = None
+    for msg in reversed(text):
+        if msg["role"] == "assistant":
+            last_message = msg["content"]
+            break
+    if not last_message:
+        return None
+    # Since we don't have VoiceAssistant, return None
+    return None
 # Create Gradio interface
 with gr.Blocks(title="AI Document Analysis & Voice Assistant") as demo:
     gr.Markdown("# 🤖 AI Document Analysis & Voice Assistant")
         )
         clear_btn.click(lambda: None, None, chatbot, queue=False)
+        clear_context_btn.click(clear_context, inputs=[], outputs=[chatbot])
         voice_btn.click(
             lambda: gr.update(visible=True),
             inputs=[chatbot],
             outputs=[audio_output]
         )
     with gr.Tab("Document Upload"):
         file_upload = gr.File(