MrSimple07 commited on
Commit
8fefe0f
Β·
1 Parent(s): 31cf81a

fixed more problems with the file uploading + processing

Browse files
Files changed (1) hide show
  1. document_processor.py +25 -2
document_processor.py CHANGED
@@ -9,6 +9,7 @@ from llama_index.core.retrievers import VectorIndexRetriever
9
  from llama_index.core.response_synthesizers import get_response_synthesizer, ResponseMode
10
  from llama_index.core.prompts import PromptTemplate
11
  from config import *
 
12
 
13
  def log_message(message):
14
  print(message, flush=True)
@@ -41,6 +42,16 @@ def process_uploaded_file(file_path, file_name, doc_name, doc_link):
41
  try:
42
  log_message(f"πŸ”„ Processing file: {file_name}")
43
 
 
 
 
 
 
 
 
 
 
 
44
  file_extension = Path(file_path).suffix.lower()
45
 
46
  if file_extension == '.pdf':
@@ -72,12 +83,24 @@ def process_uploaded_file(file_path, file_name, doc_name, doc_link):
72
 
73
  def get_existing_documents():
74
  try:
 
75
  chunks_csv_path = os.path.join(download_dir, chunks_filename)
76
  if os.path.exists(chunks_csv_path):
77
  chunks_df = pd.read_csv(chunks_csv_path)
78
- if not chunks_df.empty:
79
  unique_docs = chunks_df['document_name'].unique()
80
- return sorted(unique_docs.tolist())
 
 
 
 
 
 
 
 
 
 
 
81
  return []
82
  except Exception as e:
83
  log_message(f"❌ Error reading documents: {str(e)}")
 
9
  from llama_index.core.response_synthesizers import get_response_synthesizer, ResponseMode
10
  from llama_index.core.prompts import PromptTemplate
11
  from config import *
12
+ import shutil
13
 
14
  def log_message(message):
15
  print(message, flush=True)
 
42
  try:
43
  log_message(f"πŸ”„ Processing file: {file_name}")
44
 
45
+ # Create upload directory if it doesn't exist
46
+ upload_dir = "UPLOADED_DOCUMENTS"
47
+ os.makedirs(upload_dir, exist_ok=True)
48
+
49
+ # Copy uploaded file to permanent location
50
+ permanent_file_path = os.path.join(upload_dir, file_name)
51
+ if os.path.abspath(file_path) != os.path.abspath(permanent_file_path):
52
+ shutil.copy2(file_path, permanent_file_path)
53
+ log_message(f"πŸ“ File saved to: {permanent_file_path}")
54
+
55
  file_extension = Path(file_path).suffix.lower()
56
 
57
  if file_extension == '.pdf':
 
83
 
84
  def get_existing_documents():
85
  try:
86
+ # First check CSV file for processed documents
87
  chunks_csv_path = os.path.join(download_dir, chunks_filename)
88
  if os.path.exists(chunks_csv_path):
89
  chunks_df = pd.read_csv(chunks_csv_path)
90
+ if not chunks_df.empty and 'document_name' in chunks_df.columns:
91
  unique_docs = chunks_df['document_name'].unique()
92
+ return sorted([doc for doc in unique_docs if pd.notna(doc)])
93
+
94
+ # Fallback to checking uploaded files directory
95
+ upload_dir = "UPLOADED_DOCUMENTS"
96
+ if os.path.exists(upload_dir):
97
+ documents = []
98
+ for file_name in os.listdir(upload_dir):
99
+ if file_name.endswith(('.txt', '.pdf')):
100
+ doc_name = os.path.splitext(file_name)[0]
101
+ documents.append(doc_name)
102
+ return sorted(documents)
103
+
104
  return []
105
  except Exception as e:
106
  log_message(f"❌ Error reading documents: {str(e)}")