Mishal23 commited on
Commit
b8f71c2
Β·
verified Β·
1 Parent(s): 65afe01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -38
app.py CHANGED
@@ -1,47 +1,21 @@
1
- import json
 
 
2
  import gradio as gr
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain.vectorstores import FAISS
5
- from langchain.embeddings import HuggingFaceEmbeddings
6
- from langchain.schema import Document
7
  from huggingface_hub import InferenceClient
8
- import os
9
-
10
- # βœ… Step 1: Load and Chunk JSON with Metadata
11
- file_path = "pdf_data.json"
12
- documents = []
13
-
14
- splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
15
 
16
- try:
17
- with open(file_path, "r", encoding="utf-8") as f:
18
- data = json.load(f)
19
- for item in data:
20
- if "text" in item:
21
- section = "PPC" if "punishment" in item["text"].lower() or "section" in item["text"].lower() else "other"
22
- law_type = "criminal" if section == "PPC" else "general"
23
- chunks = splitter.split_text(item["text"])
24
- for chunk in chunks:
25
- documents.append(Document(
26
- page_content=chunk,
27
- metadata={"section": section, "law_type": law_type}
28
- ))
29
- except Exception as e:
30
- print(f"❌ Failed to load: {e}")
31
-
32
- print(f"βœ… Loaded {len(documents)} chunks with metadata")
33
-
34
- # βœ… Step 2: Create Embeddings & FAISS Vector Store
35
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
36
- db = FAISS.from_documents(documents, embedding_model)
37
 
38
- # βœ… Step 3: Load Zephyr-7B via Hugging Face Inference API
39
  client = InferenceClient(
40
  model="HuggingFaceH4/zephyr-7b-beta",
41
- token=os.getenv("HF_TOKEN") # set your token in environment variable
42
  )
43
 
44
- # βœ… Step 4: QA Function using chat_completion with formatting
45
  def ask_law_bot(query):
46
  try:
47
  results = db.similarity_search(query, k=5, filter={"section": "PPC"})
@@ -56,10 +30,8 @@ Respond to the question using the given legal context. Your answer must follow t
56
  - Reference relevant law sections like (section 220(b))
57
  - Be concise, clear, and avoid repetition
58
  - Use "YES" or "NO" if the question requires binary response
59
-
60
  Context:
61
  {context}
62
-
63
  Question: {query}
64
  Answer:"""
65
 
@@ -76,7 +48,7 @@ Answer:"""
76
  except Exception as e:
77
  return f"❌ Error: {e}"
78
 
79
- # βœ… Step 5: Gradio UI
80
  gr.Interface(
81
  fn=ask_law_bot,
82
  inputs=gr.Textbox(lines=2, placeholder="e.g., What is the punishment for theft?"),
 
1
+ # app.py
2
+
3
+ import os
4
  import gradio as gr
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
 
 
7
  from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
8
 
9
+ # Load FAISS index and embedding model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
11
+ db = FAISS.load_local("faiss_index", embedding_model)
12
 
13
+ # Load Hugging Face Inference API client
14
  client = InferenceClient(
15
  model="HuggingFaceH4/zephyr-7b-beta",
16
+ token=os.getenv("HF_TOKEN") # Make sure this is set in your environment
17
  )
18
 
 
19
  def ask_law_bot(query):
20
  try:
21
  results = db.similarity_search(query, k=5, filter={"section": "PPC"})
 
30
  - Reference relevant law sections like (section 220(b))
31
  - Be concise, clear, and avoid repetition
32
  - Use "YES" or "NO" if the question requires binary response
 
33
  Context:
34
  {context}
 
35
  Question: {query}
36
  Answer:"""
37
 
 
48
  except Exception as e:
49
  return f"❌ Error: {e}"
50
 
51
+ # Gradio UI
52
  gr.Interface(
53
  fn=ask_law_bot,
54
  inputs=gr.Textbox(lines=2, placeholder="e.g., What is the punishment for theft?"),