Spaces:
Runtime error
Runtime error
Commit ·
c66410d
1
Parent(s): d878772
Change max_length
Browse files- .gitignore +3 -0
- populate_db.py +3 -3
.gitignore
CHANGED
|
@@ -223,3 +223,6 @@ secrets/
|
|
| 223 |
|
| 224 |
# Docker volumes (production)
|
| 225 |
volumes/
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
# Docker volumes (production)
|
| 225 |
volumes/
|
| 226 |
+
|
| 227 |
+
simple_analysis.py
|
| 228 |
+
# This file is used for simple analysis of the codebase, such as checking for unused imports or variables.
|
populate_db.py
CHANGED
|
@@ -39,7 +39,7 @@ def create_collection():
|
|
| 39 |
schema = milvus_client.create_schema(auto_id=False, enable_dynamic_field=False)
|
| 40 |
schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
|
| 41 |
schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=4096) # Qwen/Qwen3-Embedding-8B dimension
|
| 42 |
-
schema.add_field(field_name="text", datatype=DataType.VARCHAR) #
|
| 43 |
schema.add_field(field_name="metadata", datatype=DataType.JSON)
|
| 44 |
|
| 45 |
# Create index for vector search
|
|
@@ -82,8 +82,8 @@ def main():
|
|
| 82 |
for i, doc in enumerate(docs):
|
| 83 |
# Check text length and truncate if necessary
|
| 84 |
text_content = doc.page_content
|
| 85 |
-
if len(text_content) >
|
| 86 |
-
text_content = text_content[:
|
| 87 |
print(f"Document {i+1} truncated from {len(doc.page_content)} to {len(text_content)} characters")
|
| 88 |
|
| 89 |
# Generate embedding for the document content
|
|
|
|
| 39 |
schema = milvus_client.create_schema(auto_id=False, enable_dynamic_field=False)
|
| 40 |
schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
|
| 41 |
schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=4096) # Qwen/Qwen3-Embedding-8B dimension
|
| 42 |
+
schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=65536) # 64KB max
|
| 43 |
schema.add_field(field_name="metadata", datatype=DataType.JSON)
|
| 44 |
|
| 45 |
# Create index for vector search
|
|
|
|
| 82 |
for i, doc in enumerate(docs):
|
| 83 |
# Check text length and truncate if necessary
|
| 84 |
text_content = doc.page_content
|
| 85 |
+
if len(text_content) > 65000: # Leave some buffer below 64KB limit
|
| 86 |
+
text_content = text_content[:65000]
|
| 87 |
print(f"Document {i+1} truncated from {len(doc.page_content)} to {len(text_content)} characters")
|
| 88 |
|
| 89 |
# Generate embedding for the document content
|