Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,17 +2,29 @@ import os
|
|
| 2 |
from pathlib import Path
|
| 3 |
import gradio as gr
|
| 4 |
|
| 5 |
-
|
| 6 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 7 |
from langchain_community.llms import HuggingFacePipeline
|
| 8 |
from langchain.prompts import PromptTemplate
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
#
|
| 11 |
PERSIST_DIR = Path("data/processed/vector_db")
|
|
|
|
| 12 |
if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
retriever = get_retriever()
|
| 16 |
|
| 17 |
# Load lightweight conversational model
|
| 18 |
MODEL_ID = os.getenv("LLM_ID", "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
|
|
|
| 2 |
from pathlib import Path
|
| 3 |
import gradio as gr
|
| 4 |
|
| 5 |
+
|
| 6 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 7 |
from langchain_community.llms import HuggingFacePipeline
|
| 8 |
from langchain.prompts import PromptTemplate
|
| 9 |
+
from langchain_community.vectorstores import Chroma
|
| 10 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 11 |
|
| 12 |
+
# Load the vector DB created by complete_ingestion.py
|
| 13 |
PERSIST_DIR = Path("data/processed/vector_db")
|
| 14 |
+
|
| 15 |
if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
|
| 16 |
+
print("⚠️ Vector DB not found. Run complete_ingestion.py first.")
|
| 17 |
+
raise SystemExit(1)
|
| 18 |
+
|
| 19 |
+
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
|
| 20 |
+
vectordb = Chroma(
|
| 21 |
+
persist_directory=str(PERSIST_DIR),
|
| 22 |
+
embedding_function=embedding_model,
|
| 23 |
+
collection_name="legal_documents"
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
retriever = vectordb.as_retriever(search_kwargs={"k": 3})
|
| 27 |
|
|
|
|
| 28 |
|
| 29 |
# Load lightweight conversational model
|
| 30 |
MODEL_ID = os.getenv("LLM_ID", "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|