Spaces:

MrSimple01
/

RAG_AIEXP_01

Sleeping

MrSimple07 commited on Oct 4, 2025

Commit

4775037

1 Parent(s): a33029f

new documents_prep

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 import os
 from llama_index.core import Settings
-from documents_prep import load_json_documents, load_table_data, load_image_data, load_csv_chunks
 from utils import get_llm_model, get_embedding_model, get_reranker_model, answer_question
 from my_logging import log_message
 from index_retriever import create_vector_index, create_query_engine
@@ -127,11 +127,6 @@ def initialize_system(repo_id, hf_token, download_dir, chunks_filename=None,
             json_documents, json_chunk_info = load_json_documents(repo_id, hf_token, json_files_dir, download_dir)
             all_documents.extend(json_documents)
             chunk_info.extend(json_chunk_info)
-        else:
-            if chunks_filename:
-                log_message("Загружаем данные из CSV")
-                csv_documents, chunks_df = load_csv_chunks(repo_id, hf_token, chunks_filename, download_dir)
-                all_documents.extend(csv_documents)
         if table_data_dir:
             log_message("Добавляю табличные данные")

 import gradio as gr
 import os
 from llama_index.core import Settings
+from documents_prep import load_json_documents, load_table_data, load_image_data
 from utils import get_llm_model, get_embedding_model, get_reranker_model, answer_question
 from my_logging import log_message
 from index_retriever import create_vector_index, create_query_engine
             json_documents, json_chunk_info = load_json_documents(repo_id, hf_token, json_files_dir, download_dir)
             all_documents.extend(json_documents)
             chunk_info.extend(json_chunk_info)
         if table_data_dir:
             log_message("Добавляю табличные данные")

documents_prep.py CHANGED Viewed

@@ -123,7 +123,7 @@ def chunk_table_document(doc):
 # TABLE DATA LOADING
 # ============================================================================
-def create_table_text(table_data):
     """Format table data as readable text"""
     doc_id = table_data.get('document_id', table_data.get('document', 'Неизвестно'))
     table_num = table_data.get('table_number', 'Неизвестно')
@@ -190,7 +190,7 @@ def load_tables_from_json(repo_id, hf_token, table_data_dir):
                     continue
                 # Create table text
-                table_text = create_table_text(sheet)
                 table_size = len(table_text)
                 table_num = sheet.get('table_number', 'unknown')
@@ -434,7 +434,7 @@ def load_json_documents(repo_id, hf_token, json_files_dir, download_dir):
 # IMAGE DATA LOADING
 # ============================================================================
-def load_image_documents(repo_id, hf_token, image_data_dir):
     """Load image metadata from CSV files"""
     log_message("=" * 60)
     log_message("LOADING IMAGE METADATA")

 # TABLE DATA LOADING
 # ============================================================================
+def load_table_data(table_data):
     """Format table data as readable text"""
     doc_id = table_data.get('document_id', table_data.get('document', 'Неизвестно'))
     table_num = table_data.get('table_number', 'Неизвестно')
                     continue
                 # Create table text
+                table_text = load_table_data(sheet)
                 table_size = len(table_text)
                 table_num = sheet.get('table_number', 'unknown')
 # IMAGE DATA LOADING
 # ============================================================================
+def load_image_data(repo_id, hf_token, image_data_dir):
     """Load image metadata from CSV files"""
     log_message("=" * 60)
     log_message("LOADING IMAGE METADATA")