|
|
import gradio as gr |
|
|
import sqlite3 |
|
|
import chromadb |
|
|
from chromadb.utils import embedding_functions |
|
|
import os |
|
|
import shutil |
|
|
import zipfile |
|
|
|
|
|
def convert_to_vector_db(sqlite_file): |
|
|
if sqlite_file is None: |
|
|
return None |
|
|
|
|
|
|
|
|
db_path = "temp.db" |
|
|
shutil.copy(sqlite_file, db_path) |
|
|
|
|
|
|
|
|
VECTOR_DB_PATH = "./legal_vector_db" |
|
|
|
|
|
|
|
|
if os.path.exists(VECTOR_DB_PATH): |
|
|
shutil.rmtree(VECTOR_DB_PATH) |
|
|
|
|
|
|
|
|
chroma_client = chromadb.PersistentClient(path=VECTOR_DB_PATH) |
|
|
|
|
|
|
|
|
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction( |
|
|
model_name="paraphrase-multilingual-MiniLM-L12-v2" |
|
|
) |
|
|
|
|
|
|
|
|
collection = chroma_client.create_collection( |
|
|
name="legal_cases_collection", |
|
|
embedding_function=sentence_transformer_ef |
|
|
) |
|
|
|
|
|
|
|
|
with sqlite3.connect(db_path) as conn: |
|
|
cursor = conn.cursor() |
|
|
cursor.execute(""" |
|
|
SELECT लिङ्क, निर्णय_नं, साल, मुद्दाको_किसिम, विषय, निवेदक, विपक्षी, प्रकरण, ठहर |
|
|
FROM cases |
|
|
""") |
|
|
rows = cursor.fetchall() |
|
|
|
|
|
documents = [] |
|
|
metadatas = [] |
|
|
ids = [] |
|
|
|
|
|
for i, row in enumerate(rows): |
|
|
link, decision_no, year, mudda_type, subject, nibedak, vipakshi, prakaran, thahar = row |
|
|
|
|
|
|
|
|
case_text = f""" |
|
|
मुद्दाको किसिम: {mudda_type} |
|
|
विषय: {subject} |
|
|
निवेदक: {nibedak} |
|
|
विपक्षी: {vipakshi} |
|
|
प्रकरण: {prakaran} |
|
|
ठहर: {thahar} |
|
|
""" |
|
|
|
|
|
documents.append(case_text.strip()) |
|
|
metadatas.append({ |
|
|
"link": link, |
|
|
"decision_no": decision_no, |
|
|
"year": year, |
|
|
"mudda_type": mudda_type, |
|
|
"subject": subject, |
|
|
"nibedak": nibedak, |
|
|
"vipakshi": vipakshi, |
|
|
"prakaran": prakaran, |
|
|
"thahar": thahar |
|
|
}) |
|
|
ids.append(f"case_{i}") |
|
|
|
|
|
|
|
|
batch_size = 100 |
|
|
for i in range(0, len(documents), batch_size): |
|
|
batch_docs = documents[i:i+batch_size] |
|
|
batch_meta = metadatas[i:i+batch_size] |
|
|
batch_ids = ids[i:i+batch_size] |
|
|
|
|
|
collection.add( |
|
|
documents=batch_docs, |
|
|
metadatas=batch_meta, |
|
|
ids=batch_ids |
|
|
) |
|
|
|
|
|
|
|
|
zip_path = "legal_vector_db.zip" |
|
|
if os.path.exists(zip_path): |
|
|
os.remove(zip_path) |
|
|
|
|
|
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: |
|
|
for root, dirs, files in os.walk(VECTOR_DB_PATH): |
|
|
for file in files: |
|
|
zipf.write(os.path.join(root, file), |
|
|
os.path.relpath(os.path.join(root, file), |
|
|
os.path.join(VECTOR_DB_PATH, '..'))) |
|
|
|
|
|
|
|
|
os.remove(db_path) |
|
|
|
|
|
return zip_path |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("SQLite DB to Vector DB Converter (Nepali Legal Cases Supported)") |
|
|
|
|
|
sqlite_upload = gr.File(label="Upload SQLite DB File (e.g., after_2061.db)") |
|
|
convert_btn = gr.Button("Convert to Vector DB") |
|
|
download_file = gr.File(label="Download Vector DB (ZIP File)") |
|
|
|
|
|
convert_btn.click(convert_to_vector_db, inputs=sqlite_upload, outputs=download_file) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |