Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -72,7 +72,13 @@ class EmbeddingModel:
|
|
| 72 |
# Embed the list of chunks
|
| 73 |
return self.model.embed_documents(chunks)
|
| 74 |
|
| 75 |
-
def process_files(model_name, split_strategy, chunk_size
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
# File processing
|
| 77 |
text = ""
|
| 78 |
for file in os.listdir(FILES_DIR):
|
|
@@ -90,7 +96,7 @@ def process_files(model_name, split_strategy, chunk_size=500, overlap_size=50, m
|
|
| 90 |
# Embed chunks, not the full text
|
| 91 |
model = EmbeddingModel(MODELS[model_name], max_tokens=max_tokens)
|
| 92 |
embeddings = model.embed(chunks)
|
| 93 |
-
|
| 94 |
return embeddings, chunks
|
| 95 |
|
| 96 |
def search_embeddings(query, model_name, top_k):
|
|
@@ -133,7 +139,15 @@ def upload_file(file, model_name, split_strategy, overlap_size,chunk_size, max_
|
|
| 133 |
overlap_size = int(overlap_size) # Convert to int if valid
|
| 134 |
except ValueError:
|
| 135 |
return {"error": "Chunk size and overlap size must be valid integers."}
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
# Handle file upload using Gradio file object
|
| 139 |
file_path = file.name # Get the file path from Gradio file object
|
|
|
|
| 72 |
# Embed the list of chunks
|
| 73 |
return self.model.embed_documents(chunks)
|
| 74 |
|
| 75 |
+
def process_files(model_name, split_strategy, chunk_size, overlap_size, max_tokens):
|
| 76 |
+
print('-----mmm--------')
|
| 77 |
+
print(model_name)
|
| 78 |
+
print(split_strategy)
|
| 79 |
+
print(overlap_size)
|
| 80 |
+
print(chunk_size)
|
| 81 |
+
print(max_tokens)
|
| 82 |
# File processing
|
| 83 |
text = ""
|
| 84 |
for file in os.listdir(FILES_DIR):
|
|
|
|
| 96 |
# Embed chunks, not the full text
|
| 97 |
model = EmbeddingModel(MODELS[model_name], max_tokens=max_tokens)
|
| 98 |
embeddings = model.embed(chunks)
|
| 99 |
+
print(chunks)
|
| 100 |
return embeddings, chunks
|
| 101 |
|
| 102 |
def search_embeddings(query, model_name, top_k):
|
|
|
|
| 139 |
overlap_size = int(overlap_size) # Convert to int if valid
|
| 140 |
except ValueError:
|
| 141 |
return {"error": "Chunk size and overlap size must be valid integers."}
|
| 142 |
+
print('-------------')
|
| 143 |
+
print(file.name)
|
| 144 |
+
print(model_name)
|
| 145 |
+
print(split_strategy)
|
| 146 |
+
print(overlap_size)
|
| 147 |
+
print(chunk_size)
|
| 148 |
+
print(max_tokens)
|
| 149 |
+
print(query)
|
| 150 |
+
print(top_k)
|
| 151 |
|
| 152 |
# Handle file upload using Gradio file object
|
| 153 |
file_path = file.name # Get the file path from Gradio file object
|