MrSimple07 commited on
Commit
79d5a5c
·
1 Parent(s): f6a9f63

eski holat

Browse files
Files changed (3) hide show
  1. app.py +1 -0
  2. index_retriever.py +1 -26
  3. utils.py +6 -0
app.py CHANGED
@@ -103,6 +103,7 @@ def initialize_system(repo_id, hf_token, download_dir, chunks_filename=None,
103
  from llama_index.core.text_splitter import TokenTextSplitter
104
 
105
  embed_model = get_embedding_model()
 
106
  llm = get_llm_model(DEFAULT_MODEL)
107
  reranker = get_reranker_model()
108
 
 
103
  from llama_index.core.text_splitter import TokenTextSplitter
104
 
105
  embed_model = get_embedding_model()
106
+
107
  llm = get_llm_model(DEFAULT_MODEL)
108
  reranker = get_reranker_model()
109
 
index_retriever.py CHANGED
@@ -12,32 +12,7 @@ def create_vector_index(documents):
12
  log_message("Строю векторный индекс")
13
  return VectorStoreIndex.from_documents(documents)
14
 
15
- def deduplicate_nodes(nodes):
16
- """Deduplicate retrieved nodes based on unique identifiers"""
17
- seen = set()
18
- unique_nodes = []
19
-
20
- for node in nodes:
21
- # Create unique identifier from metadata
22
- doc_id = node.metadata.get('document_id', '')
23
- section_id = node.metadata.get('section_id', '')
24
- chunk_id = node.metadata.get('chunk_id', 0)
25
- node_type = node.metadata.get('type', 'text')
26
-
27
- if node_type == 'table':
28
- table_num = node.metadata.get('table_number', '')
29
- identifier = f"{doc_id}|table|{table_num}|{chunk_id}"
30
- elif node_type == 'image':
31
- img_num = node.metadata.get('image_number', '')
32
- identifier = f"{doc_id}|image|{img_num}"
33
- else:
34
- identifier = f"{doc_id}|{section_id}|{chunk_id}"
35
-
36
- if identifier not in seen:
37
- seen.add(identifier)
38
- unique_nodes.append(node)
39
-
40
- return unique_nodes
41
 
42
  def create_query_engine(vector_index):
43
  try:
 
12
  log_message("Строю векторный индекс")
13
  return VectorStoreIndex.from_documents(documents)
14
 
15
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  def create_query_engine(vector_index):
18
  try:
utils.py CHANGED
@@ -2,6 +2,12 @@ from llama_index.llms.google_genai import GoogleGenAI
2
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
3
  from sentence_transformers import CrossEncoder
4
  from my_logging import log_message
 
 
 
 
 
 
5
 
6
  def get_llm_model(api_key, model_name="gemini-2.0-flash"):
7
  return GoogleGenAI(model=model_name, api_key=api_key)
 
2
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
3
  from sentence_transformers import CrossEncoder
4
  from my_logging import log_message
5
+ import os
6
+
7
+ api_key = os.getenv('GOOGLE_API_KEY') # or however you're loading it
8
+ if not api_key:
9
+ raise ValueError("GOOGLE_API_KEY not found in environment")
10
+
11
 
12
  def get_llm_model(api_key, model_name="gemini-2.0-flash"):
13
  return GoogleGenAI(model=model_name, api_key=api_key)