Update app.py
Browse files
app.py
CHANGED
|
@@ -9,12 +9,16 @@ import gradio as gr
|
|
| 9 |
import os
|
| 10 |
import pytesseract
|
| 11 |
from PIL import Image
|
| 12 |
-
|
| 13 |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
| 14 |
index = faiss.read_index('IPC_index.faiss')
|
| 15 |
index2 = faiss.read_index('CrpC_index.faiss')
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
# Step 3: Retrieval with Citations using PDF filename
|
| 19 |
def retrieve_info_with_citation(query, top_k=5):
|
| 20 |
query_embedding = model.encode([query])
|
|
@@ -24,7 +28,10 @@ def retrieve_info_with_citation(query, top_k=5):
|
|
| 24 |
for i in range(min(top_k, len(I[0]))):
|
| 25 |
if D[0][i] < 1.0: # Relevance threshold
|
| 26 |
chunk_index = I[0][i]
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
| 28 |
results.append((match, citation))
|
| 29 |
else:
|
| 30 |
break
|
|
@@ -37,13 +44,16 @@ def retrieve_info_with_citation(query, top_k=5):
|
|
| 37 |
|
| 38 |
def retrieve_info_with_citation2(query, top_k=5):
|
| 39 |
query_embedding = model.encode([query])
|
| 40 |
-
D, I =
|
| 41 |
|
| 42 |
results = []
|
| 43 |
for i in range(min(top_k, len(I[0]))):
|
| 44 |
if D[0][i] < 1.0: # Relevance threshold
|
| 45 |
chunk_index = I[0][i]
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
| 47 |
results.append((match, citation))
|
| 48 |
else:
|
| 49 |
break
|
|
@@ -63,6 +73,7 @@ def retrieve_info2(query):
|
|
| 63 |
formatted_results = "\n\n".join([f"{i+1}. {match}\n{citation}" for i, (match, citation) in enumerate(results)])
|
| 64 |
return formatted_results
|
| 65 |
|
|
|
|
| 66 |
ipc_tool = Tool(
|
| 67 |
name="IPC Information Retrieval",
|
| 68 |
func=retrieve_info,
|
|
|
|
| 9 |
import os
|
| 10 |
import pytesseract
|
| 11 |
from PIL import Image
|
| 12 |
+
import pickle
|
| 13 |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
| 14 |
index = faiss.read_index('IPC_index.faiss')
|
| 15 |
index2 = faiss.read_index('CrpC_index.faiss')
|
| 16 |
+
flattened_data = pickle.load('IPC_F')
|
| 17 |
+
pdf_filenames = pickle.load('IPC_N')
|
| 18 |
+
chunk_indices = pickle.load('IPC_C')
|
| 19 |
+
flattened_data2 = pickle.load('CrPC_F')
|
| 20 |
+
pdf_filenames2 = pickle.load('CrPC_N')
|
| 21 |
+
chunk_indices2 = pickle.load('CrPC_C')
|
| 22 |
# Step 3: Retrieval with Citations using PDF filename
|
| 23 |
def retrieve_info_with_citation(query, top_k=5):
|
| 24 |
query_embedding = model.encode([query])
|
|
|
|
| 28 |
for i in range(min(top_k, len(I[0]))):
|
| 29 |
if D[0][i] < 1.0: # Relevance threshold
|
| 30 |
chunk_index = I[0][i]
|
| 31 |
+
pdf_filename = pdf_filenames[chunk_index]
|
| 32 |
+
chunk_number = chunk_indices[chunk_index] + 1
|
| 33 |
+
match = flattened_data[chunk_index]
|
| 34 |
+
citation = f"Source: {pdf_filename}, Chunk: {chunk_number}"
|
| 35 |
results.append((match, citation))
|
| 36 |
else:
|
| 37 |
break
|
|
|
|
| 44 |
|
| 45 |
def retrieve_info_with_citation2(query, top_k=5):
|
| 46 |
query_embedding = model.encode([query])
|
| 47 |
+
D, I = index2.search(query_embedding, k=top_k)
|
| 48 |
|
| 49 |
results = []
|
| 50 |
for i in range(min(top_k, len(I[0]))):
|
| 51 |
if D[0][i] < 1.0: # Relevance threshold
|
| 52 |
chunk_index = I[0][i]
|
| 53 |
+
pdf_filename = pdf_filenames2[chunk_index]
|
| 54 |
+
chunk_number = chunk_indices2[chunk_index] + 1
|
| 55 |
+
match = flattened_data2[chunk_index]
|
| 56 |
+
citation = f"Source: {pdf_filename}, Chunk: {chunk_number}"
|
| 57 |
results.append((match, citation))
|
| 58 |
else:
|
| 59 |
break
|
|
|
|
| 73 |
formatted_results = "\n\n".join([f"{i+1}. {match}\n{citation}" for i, (match, citation) in enumerate(results)])
|
| 74 |
return formatted_results
|
| 75 |
|
| 76 |
+
|
| 77 |
ipc_tool = Tool(
|
| 78 |
name="IPC Information Retrieval",
|
| 79 |
func=retrieve_info,
|