menikev commited on
Commit
a5640e9
·
verified ·
1 Parent(s): 665656b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -2,17 +2,29 @@ import os
2
  from pathlib import Path
3
  import gradio as gr
4
 
5
- from retriever import get_retriever
6
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
7
  from langchain_community.llms import HuggingFacePipeline
8
  from langchain.prompts import PromptTemplate
 
 
9
 
10
- # Ensure vector DB exists (from complete_ingestion.py output)
11
  PERSIST_DIR = Path("data/processed/vector_db")
 
12
  if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
13
- raise RuntimeError("⚠️ Vector DB not found. Please run complete_ingestion.py first.")
 
 
 
 
 
 
 
 
 
 
14
 
15
- retriever = get_retriever()
16
 
17
  # Load lightweight conversational model
18
  MODEL_ID = os.getenv("LLM_ID", "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
 
2
  from pathlib import Path
3
  import gradio as gr
4
 
5
+
6
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
7
  from langchain_community.llms import HuggingFacePipeline
8
  from langchain.prompts import PromptTemplate
9
+ from langchain_community.vectorstores import Chroma
10
+ from langchain_huggingface import HuggingFaceEmbeddings
11
 
12
+ # Load the vector DB created by complete_ingestion.py
13
  PERSIST_DIR = Path("data/processed/vector_db")
14
+
15
  if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
16
+ print("⚠️ Vector DB not found. Run complete_ingestion.py first.")
17
+ raise SystemExit(1)
18
+
19
+ embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
20
+ vectordb = Chroma(
21
+ persist_directory=str(PERSIST_DIR),
22
+ embedding_function=embedding_model,
23
+ collection_name="legal_documents"
24
+ )
25
+
26
+ retriever = vectordb.as_retriever(search_kwargs={"k": 3})
27
 
 
28
 
29
  # Load lightweight conversational model
30
  MODEL_ID = os.getenv("LLM_ID", "TinyLlama/TinyLlama-1.1B-Chat-v1.0")