Spaces:
Runtime error
Runtime error
| import os | |
| import nltk | |
| import gradio as gr | |
| from documents import process_docx, process_pdf, process_txt | |
| from indexing import index_document | |
| from querying import query_documents | |
| import preprocess | |
| # Download required NLTK data (do this *once* when the app starts) | |
| try: | |
| nltk.data.find("corpora/wordnet") | |
| except LookupError: | |
| nltk.download("wordnet") | |
| try: | |
| nltk.data.find("corpora/stopwords") | |
| except LookupError: | |
| nltk.download("stopwords") | |
| try: | |
| nltk.data.find("tokenizers/punkt") | |
| except LookupError: | |
| nltk.download("punkt") | |
| UPLOAD_FOLDER = 'uploads' | |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| def process_and_query(file, query_text): | |
| """ | |
| Processes a document, indexes it, and performs a query. This is the | |
| main function called by the Gradio interface. | |
| """ | |
| if not file: | |
| return "No file uploaded", [] | |
| file_path = file.name # Gradio passes a NamedTemporaryFile | |
| # Process file | |
| if file.name.endswith('.docx'): | |
| text = process_docx(file_path) | |
| elif file.name.endswith('.pdf'): | |
| text = process_pdf(file_path) | |
| elif file.name.endswith('.txt'): | |
| text = process_txt(file_path) | |
| else: | |
| return "Unsupported file type", [] | |
| preprocessed_text = preprocess.preprocess_text(text['text']) | |
| print (preprocessed_text) #ADD THIS | |
| # Index the document | |
| index_result = index_document("documents", file.name, preprocessed_text) | |
| # Perform the query | |
| query_results = query_documents("documents", query_text) | |
| return f"Indexing result: {index_result}", query_results | |
| # Gradio Interface | |
| iface = gr.Interface( | |
| fn=process_and_query, | |
| inputs=[ | |
| gr.File(label="Upload Document"), | |
| gr.Textbox(label="Enter Query") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Indexing Result"), | |
| gr.JSON(label="Query Results") # Display query results as JSON | |
| ], | |
| title="Document Processing and Query", | |
| description="Upload a document (docx, pdf, or txt), enter a query, and get the results." | |
| ) | |
| if __name__ == '__main__': | |
| iface.launch() | |