Update app.py
Browse files
app.py
CHANGED
|
@@ -10,14 +10,18 @@ import numpy as np
|
|
| 10 |
# Initialize Groq API
|
| 11 |
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
| 12 |
|
| 13 |
-
#
|
|
|
|
|
|
|
|
|
|
| 14 |
def list_pdf_files():
|
| 15 |
-
pdf_files = [f for f in os.listdir(
|
| 16 |
return pdf_files
|
| 17 |
|
| 18 |
# Function to extract text and split into chunks using pdfplumber
|
| 19 |
-
def extract_text_from_pdf(
|
| 20 |
try:
|
|
|
|
| 21 |
full_text = ""
|
| 22 |
with pdfplumber.open(file_path) as pdf:
|
| 23 |
for page in pdf.pages:
|
|
@@ -48,8 +52,8 @@ def retrieve_relevant_chunks(chunks, question):
|
|
| 48 |
return ""
|
| 49 |
|
| 50 |
# Function to answer the question using selected relevant chunks
|
| 51 |
-
def answer_question(
|
| 52 |
-
chunks = extract_text_from_pdf(
|
| 53 |
if not chunks:
|
| 54 |
return "Error: Could not extract text from PDF."
|
| 55 |
|
|
|
|
| 10 |
# Initialize Groq API
|
| 11 |
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
| 12 |
|
| 13 |
+
# Path to the directory containing PDFs in Hugging Face Space
|
| 14 |
+
PDF_DIR = "./Legal2/main"
|
| 15 |
+
|
| 16 |
+
# Function to list PDF files in the Hugging Face Space directory
|
| 17 |
def list_pdf_files():
|
| 18 |
+
pdf_files = [f for f in os.listdir(PDF_DIR) if f.endswith('.pdf')]
|
| 19 |
return pdf_files
|
| 20 |
|
| 21 |
# Function to extract text and split into chunks using pdfplumber
|
| 22 |
+
def extract_text_from_pdf(file_name):
|
| 23 |
try:
|
| 24 |
+
file_path = os.path.join(PDF_DIR, file_name)
|
| 25 |
full_text = ""
|
| 26 |
with pdfplumber.open(file_path) as pdf:
|
| 27 |
for page in pdf.pages:
|
|
|
|
| 52 |
return ""
|
| 53 |
|
| 54 |
# Function to answer the question using selected relevant chunks
|
| 55 |
+
def answer_question(file_name, question):
|
| 56 |
+
chunks = extract_text_from_pdf(file_name)
|
| 57 |
if not chunks:
|
| 58 |
return "Error: Could not extract text from PDF."
|
| 59 |
|