File size: 3,797 Bytes
05e4e1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import gradio as gr
import sqlite3
import chromadb
from chromadb.utils import embedding_functions
import os
import shutil
import zipfile

def convert_to_vector_db(sqlite_file):
    if sqlite_file is None:
        return None  # No file uploaded, return nothing

    # Copy uploaded file to a temporary path
    db_path = "temp.db"
    shutil.copy(sqlite_file, db_path)

    # Define vector DB path
    VECTOR_DB_PATH = "./legal_vector_db"

    # Clean existing directory if it exists
    if os.path.exists(VECTOR_DB_PATH):
        shutil.rmtree(VECTOR_DB_PATH)

    # Initialize ChromaDB persistent client
    chroma_client = chromadb.PersistentClient(path=VECTOR_DB_PATH)

    # Use multilingual embedding model suitable for Nepali
    sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(
        model_name="paraphrase-multilingual-MiniLM-L12-v2"
    )

    # Create collection
    collection = chroma_client.create_collection(
        name="legal_cases_collection",
        embedding_function=sentence_transformer_ef
    )

    # Load data from SQLite
    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        cursor.execute("""
            SELECT लिङ्क, निर्णय_नं, साल, मुद्दाको_किसिम, विषय, निवेदक, विपक्षी, प्रकरण, ठहर
            FROM cases
        """)
        rows = cursor.fetchall()

    documents = []
    metadatas = []
    ids = []

    for i, row in enumerate(rows):
        link, decision_no, year, mudda_type, subject, nibedak, vipakshi, prakaran, thahar = row
        
        # Combine text in Nepali format
        case_text = f"""
        मुद्दाको किसिम: {mudda_type}
        विषय: {subject}
        निवेदक: {nibedak}
        विपक्षी: {vipakshi}
        प्रकरण: {prakaran}
        ठहर: {thahar}
        """
        
        documents.append(case_text.strip())
        metadatas.append({
            "link": link,
            "decision_no": decision_no,
            "year": year,
            "mudda_type": mudda_type,
            "subject": subject,
            "nibedak": nibedak,
            "vipakshi": vipakshi,
            "prakaran": prakaran,
            "thahar": thahar
        })
        ids.append(f"case_{i}")

    # Add to collection in batches
    batch_size = 100
    for i in range(0, len(documents), batch_size):
        batch_docs = documents[i:i+batch_size]
        batch_meta = metadatas[i:i+batch_size]
        batch_ids = ids[i:i+batch_size]
        
        collection.add(
            documents=batch_docs,
            metadatas=batch_meta,
            ids=batch_ids
        )

    # Zip the vector DB directory
    zip_path = "legal_vector_db.zip"
    if os.path.exists(zip_path):
        os.remove(zip_path)

    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(VECTOR_DB_PATH):
            for file in files:
                zipf.write(os.path.join(root, file),
                           os.path.relpath(os.path.join(root, file),
                                           os.path.join(VECTOR_DB_PATH, '..')))

    # Clean up temp DB
    os.remove(db_path)

    return zip_path

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("SQLite DB to Vector DB Converter (Nepali Legal Cases Supported)")
    
    sqlite_upload = gr.File(label="Upload SQLite DB File (e.g., after_2061.db)")
    convert_btn = gr.Button("Convert to Vector DB")
    download_file = gr.File(label="Download Vector DB (ZIP File)")

    convert_btn.click(convert_to_vector_db, inputs=sqlite_upload, outputs=download_file)

if __name__ == "__main__":
    demo.launch()