Spaces:

MrSimple01
/

RAG_AIEXP_01

Sleeping

App Files Files Community

MrSimple07 commited on Aug 14, 2025

Commit

8fefe0f

1 Parent(s): 31cf81a

fixed more problems with the file uploading + processing

Browse files

Files changed (1) hide show

document_processor.py +25 -2

document_processor.py CHANGED Viewed

@@ -9,6 +9,7 @@ from llama_index.core.retrievers import VectorIndexRetriever
 from llama_index.core.response_synthesizers import get_response_synthesizer, ResponseMode
 from llama_index.core.prompts import PromptTemplate
 from config import *
 def log_message(message):
     print(message, flush=True)
@@ -41,6 +42,16 @@ def process_uploaded_file(file_path, file_name, doc_name, doc_link):
     try:
         log_message(f"🔄 Processing file: {file_name}")
         file_extension = Path(file_path).suffix.lower()
         if file_extension == '.pdf':
@@ -72,12 +83,24 @@ def process_uploaded_file(file_path, file_name, doc_name, doc_link):
 def get_existing_documents():
     try:
         chunks_csv_path = os.path.join(download_dir, chunks_filename)
         if os.path.exists(chunks_csv_path):
             chunks_df = pd.read_csv(chunks_csv_path)
-            if not chunks_df.empty:
                 unique_docs = chunks_df['document_name'].unique()
-                return sorted(unique_docs.tolist())
         return []
     except Exception as e:
         log_message(f"❌ Error reading documents: {str(e)}")

 from llama_index.core.response_synthesizers import get_response_synthesizer, ResponseMode
 from llama_index.core.prompts import PromptTemplate
 from config import *
+import shutil
 def log_message(message):
     print(message, flush=True)
     try:
         log_message(f"🔄 Processing file: {file_name}")
+        # Create upload directory if it doesn't exist
+        upload_dir = "UPLOADED_DOCUMENTS"
+        os.makedirs(upload_dir, exist_ok=True)
+        # Copy uploaded file to permanent location
+        permanent_file_path = os.path.join(upload_dir, file_name)
+        if os.path.abspath(file_path) != os.path.abspath(permanent_file_path):
+            shutil.copy2(file_path, permanent_file_path)
+            log_message(f"📁 File saved to: {permanent_file_path}")
         file_extension = Path(file_path).suffix.lower()
         if file_extension == '.pdf':
 def get_existing_documents():
     try:
+        # First check CSV file for processed documents
         chunks_csv_path = os.path.join(download_dir, chunks_filename)
         if os.path.exists(chunks_csv_path):
             chunks_df = pd.read_csv(chunks_csv_path)
+            if not chunks_df.empty and 'document_name' in chunks_df.columns:
                 unique_docs = chunks_df['document_name'].unique()
+                return sorted([doc for doc in unique_docs if pd.notna(doc)])
+        # Fallback to checking uploaded files directory
+        upload_dir = "UPLOADED_DOCUMENTS"
+        if os.path.exists(upload_dir):
+            documents = []
+            for file_name in os.listdir(upload_dir):
+                if file_name.endswith(('.txt', '.pdf')):
+                    doc_name = os.path.splitext(file_name)[0]
+                    documents.append(doc_name)
+            return sorted(documents)
         return []
     except Exception as e:
         log_message(f"❌ Error reading documents: {str(e)}")