Spaces:
Sleeping
Sleeping
| import sqlite3 | |
| import os | |
| from datetime import datetime | |
| def initialize_database(): | |
| """ | |
| Initialize the SQLite database and create the 'documents' table if it doesn't exist. | |
| This function performs the following steps: | |
| 1. Connects to the SQLite database (or creates it if it doesn't exist). | |
| 2. Creates the 'documents' table with the following columns: | |
| - `id`: An auto-incrementing primary key. | |
| - `text`: The main text content of the document (required, non-nullable). | |
| - `topics`: A string representing associated topics (optional). | |
| - `date`: A timestamp indicating when the row was inserted (default: current timestamp). | |
| 3. Commits the changes and closes the connection. | |
| The `date` column is automatically populated with the current timestamp when a new row is inserted. | |
| Example: | |
| -------- | |
| >>> initialize_database() | |
| # Creates or updates the 'dataset.db' file with the 'documents' table schema. | |
| """ | |
| # Connect to the SQLite database (or create it if it doesn't exist) | |
| conn = sqlite3.connect('dataset.db') | |
| cursor = conn.cursor() | |
| # Create the 'documents' table if it doesn't exist | |
| cursor.execute(''' | |
| CREATE TABLE IF NOT EXISTS documents ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| text TEXT NOT NULL, | |
| topics TEXT, | |
| date TIMESTAMP DEFAULT CURRENT_TIMESTAMP | |
| ) | |
| ''') | |
| # Commit changes and close the connection | |
| conn.commit() | |
| conn.close() | |
| from huggingface_hub import HfApi | |
| def commit_to_huggingface(): | |
| """Commit the dataset.db file to the Hugging Face Space repository.""" | |
| api_token = os.getenv("hf_key") | |
| api = HfApi(token=api_token) | |
| # Replace with your Space's repository name | |
| repo_id = "Danielrahmai1991/dataset_interface" | |
| # Upload and commit the dataset.db file | |
| api.upload_file( | |
| path_or_fileobj="dataset.db", | |
| path_in_repo="dataset.db", | |
| repo_id=repo_id, | |
| repo_type="space" | |
| ) | |
| def save_to_db(chunks, topics=None): | |
| """ | |
| Save chunks of text to the SQLite database. | |
| This function performs the following steps: | |
| 1. Ensures the database and 'documents' table are initialized by calling `initialize_database`. | |
| 2. Connects to the SQLite database. | |
| 3. Inserts each chunk of text into the 'documents' table along with associated topics. | |
| - The `text` column stores the chunk of text. | |
| - The `topics` column stores the associated topics (optional). | |
| - The `date` column is automatically populated with the current timestamp when the row is inserted. | |
| 4. Commits the changes and closes the connection. | |
| 5. Calls `commit_to_huggingface` to synchronize the database with an external repository (if applicable). | |
| Parameters: | |
| ---------- | |
| chunks : list of str | |
| A list of text chunks to be saved to the database. | |
| topics : str or None, optional | |
| A string representing the topics associated with the chunks. Defaults to None. | |
| Example: | |
| -------- | |
| >>> save_to_db(["This is the first chunk.", "This is the second chunk."], "Example Topics") | |
| # Saves two rows to the 'documents' table with the provided text and topics. | |
| """ | |
| # Ensure the database and table are initialized | |
| initialize_database() | |
| # Connect to the database | |
| conn = sqlite3.connect('dataset.db') | |
| cursor = conn.cursor() | |
| # Insert chunks into the database | |
| for chunk in chunks: | |
| cursor.execute('INSERT INTO documents (text, topics) VALUES (?, ?)', (chunk, topics)) | |
| # Commit changes and close the connection | |
| conn.commit() | |
| conn.close() | |
| commit_to_huggingface() | |