Spaces:
Configuration error
Configuration error
File size: 2,062 Bytes
42da79c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | import os
import nltk
import gradio as gr
from documents import process_docx, process_pdf, process_txt
from indexing import index_document
from querying import query_documents
import preprocess
# Download required NLTK data (do this *once* when the app starts)
try:
nltk.data.find("corpora/wordnet")
except LookupError:
nltk.download("wordnet")
try:
nltk.data.find("corpora/stopwords")
except LookupError:
nltk.download("stopwords")
try:
nltk.data.find("tokenizers/punkt")
except LookupError:
nltk.download("punkt")
UPLOAD_FOLDER = 'uploads'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
def process_and_query(file, query_text):
"""
Processes a document, indexes it, and performs a query. This is the
main function called by the Gradio interface.
"""
if not file:
return "No file uploaded", []
file_path = file.name # Gradio passes a NamedTemporaryFile
# Process file
if file.name.endswith('.docx'):
text = process_docx(file_path)
elif file.name.endswith('.pdf'):
text = process_pdf(file_path)
elif file.name.endswith('.txt'):
text = process_txt(file_path)
else:
return "Unsupported file type", []
preprocessed_text = preprocess.preprocess_text(text['text'])
print (preprocessed_text) #ADD THIS
# Index the document
index_result = index_document("documents", file.name, preprocessed_text)
# Perform the query
query_results = query_documents("documents", query_text)
return f"Indexing result: {index_result}", query_results
# Gradio Interface
iface = gr.Interface(
fn=process_and_query,
inputs=[
gr.File(label="Upload Document"),
gr.Textbox(label="Enter Query")
],
outputs=[
gr.Textbox(label="Indexing Result"),
gr.JSON(label="Query Results") # Display query results as JSON
],
title="Document Processing and Query",
description="Upload a document (docx, pdf, or txt), enter a query, and get the results."
)
if __name__ == '__main__':
iface.launch()
|