Spaces:
Sleeping
Sleeping
Update database.py
Browse files- database.py +47 -3
database.py
CHANGED
|
@@ -1,8 +1,27 @@
|
|
| 1 |
import sqlite3
|
| 2 |
import os
|
|
|
|
| 3 |
|
| 4 |
def initialize_database():
|
| 5 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
# Connect to the SQLite database (or create it if it doesn't exist)
|
| 7 |
conn = sqlite3.connect('dataset.db')
|
| 8 |
cursor = conn.cursor()
|
|
@@ -12,7 +31,8 @@ def initialize_database():
|
|
| 12 |
CREATE TABLE IF NOT EXISTS documents (
|
| 13 |
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 14 |
text TEXT NOT NULL,
|
| 15 |
-
topics TEXT
|
|
|
|
| 16 |
)
|
| 17 |
''')
|
| 18 |
|
|
@@ -41,7 +61,31 @@ def commit_to_huggingface():
|
|
| 41 |
|
| 42 |
|
| 43 |
def save_to_db(chunks, topics=None):
|
| 44 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
# Ensure the database and table are initialized
|
| 46 |
initialize_database()
|
| 47 |
|
|
|
|
| 1 |
import sqlite3
|
| 2 |
import os
|
| 3 |
+
from datetime import datetime
|
| 4 |
|
| 5 |
def initialize_database():
|
| 6 |
+
"""
|
| 7 |
+
Initialize the SQLite database and create the 'documents' table if it doesn't exist.
|
| 8 |
+
|
| 9 |
+
This function performs the following steps:
|
| 10 |
+
1. Connects to the SQLite database (or creates it if it doesn't exist).
|
| 11 |
+
2. Creates the 'documents' table with the following columns:
|
| 12 |
+
- `id`: An auto-incrementing primary key.
|
| 13 |
+
- `text`: The main text content of the document (required, non-nullable).
|
| 14 |
+
- `topics`: A string representing associated topics (optional).
|
| 15 |
+
- `date`: A timestamp indicating when the row was inserted (default: current timestamp).
|
| 16 |
+
3. Commits the changes and closes the connection.
|
| 17 |
+
|
| 18 |
+
The `date` column is automatically populated with the current timestamp when a new row is inserted.
|
| 19 |
+
|
| 20 |
+
Example:
|
| 21 |
+
--------
|
| 22 |
+
>>> initialize_database()
|
| 23 |
+
# Creates or updates the 'dataset.db' file with the 'documents' table schema.
|
| 24 |
+
"""
|
| 25 |
# Connect to the SQLite database (or create it if it doesn't exist)
|
| 26 |
conn = sqlite3.connect('dataset.db')
|
| 27 |
cursor = conn.cursor()
|
|
|
|
| 31 |
CREATE TABLE IF NOT EXISTS documents (
|
| 32 |
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 33 |
text TEXT NOT NULL,
|
| 34 |
+
topics TEXT,
|
| 35 |
+
date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
| 36 |
)
|
| 37 |
''')
|
| 38 |
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
def save_to_db(chunks, topics=None):
|
| 64 |
+
"""
|
| 65 |
+
Save chunks of text to the SQLite database.
|
| 66 |
+
|
| 67 |
+
This function performs the following steps:
|
| 68 |
+
1. Ensures the database and 'documents' table are initialized by calling `initialize_database`.
|
| 69 |
+
2. Connects to the SQLite database.
|
| 70 |
+
3. Inserts each chunk of text into the 'documents' table along with associated topics.
|
| 71 |
+
- The `text` column stores the chunk of text.
|
| 72 |
+
- The `topics` column stores the associated topics (optional).
|
| 73 |
+
- The `date` column is automatically populated with the current timestamp when the row is inserted.
|
| 74 |
+
4. Commits the changes and closes the connection.
|
| 75 |
+
5. Calls `commit_to_huggingface` to synchronize the database with an external repository (if applicable).
|
| 76 |
+
|
| 77 |
+
Parameters:
|
| 78 |
+
----------
|
| 79 |
+
chunks : list of str
|
| 80 |
+
A list of text chunks to be saved to the database.
|
| 81 |
+
topics : str or None, optional
|
| 82 |
+
A string representing the topics associated with the chunks. Defaults to None.
|
| 83 |
+
|
| 84 |
+
Example:
|
| 85 |
+
--------
|
| 86 |
+
>>> save_to_db(["This is the first chunk.", "This is the second chunk."], "Example Topics")
|
| 87 |
+
# Saves two rows to the 'documents' table with the provided text and topics.
|
| 88 |
+
"""
|
| 89 |
# Ensure the database and table are initialized
|
| 90 |
initialize_database()
|
| 91 |
|