Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| from dotenv import load_dotenv | |
| from qdrant_client import QdrantClient | |
| from qdrant_client.models import Distance, VectorParams, PointStruct | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain.schema import Document | |
| load_dotenv() | |
| # Configurar cliente Qdrant | |
| qdrant = QdrantClient( | |
| url=os.environ.get("QDRANT_URL"), | |
| api_key=os.environ.get("QDRANT_SERVICE_KEY"), | |
| timeout=60 | |
| ) | |
| # Configurar embeddings | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/static-similarity-mrl-multilingual-v1", | |
| model_kwargs={'device': 'cpu'} | |
| ) | |
| collection_name = "documents" | |
| def create_collection(): | |
| """Crear colección si no existe""" | |
| try: | |
| qdrant.get_collection(collection_name) | |
| print(f"Colección '{collection_name}' ya existe") | |
| except Exception: | |
| print(f"Creando colección '{collection_name}'...") | |
| qdrant.create_collection( | |
| collection_name=collection_name, | |
| vectors_config=VectorParams( | |
| size=1024, # Dimensión correcta | |
| distance=Distance.COSINE | |
| ) | |
| ) | |
| print("Colección creada exitosamente") | |
| def upload_embeddings_from_jsonl(file_path: str): | |
| with open(file_path, 'r') as jsonl_file: | |
| json_list = list(jsonl_file) | |
| json_QA = [] | |
| for json_str in json_list: | |
| json_data = json.loads(json_str) | |
| json_QA.append(json_data) | |
| docs = [] | |
| for sample in json_QA: | |
| content = f"Question : {sample['Question']}\n\nFinal answer : {sample['Final answer']}" | |
| doc = { | |
| "page_content" : content, | |
| "metadata" : { | |
| "source" : sample['task_id'] | |
| }, | |
| "embedding" : embeddings.embed_query(content), | |
| } | |
| docs.append(doc) | |
| print(f"Subiendo {len(docs)} documentos a Qdrant...") | |
| try: | |
| points = [] | |
| for idx, doc in enumerate(docs): | |
| point = PointStruct( | |
| id=idx, | |
| vector=doc["embedding"], | |
| payload={ | |
| "page_content": doc["page_content"], | |
| "metadata": doc["metadata"] | |
| } | |
| ) | |
| points.append(point) | |
| response = qdrant.upsert( | |
| collection_name=collection_name, | |
| points=points, | |
| wait=True | |
| ) | |
| print(response) | |
| except Exception as exception: | |
| print("Error inserting data into Qdrant:", exception) | |
| def main(): | |
| # Crear colección | |
| create_collection() | |
| # Subir embeddings | |
| jsonl_file = "./metadata.jsonl" # Ajusta la ruta si es necesario | |
| if os.path.exists(jsonl_file): | |
| print(f"Subiendo embeddings desde {jsonl_file}...") | |
| # random_data() | |
| upload_embeddings_from_jsonl(jsonl_file) | |
| print("¡Embeddings subidos exitosamente!") | |
| else: | |
| print(f"Archivo {jsonl_file} no encontrado") | |
| if __name__ == "__main__": | |
| main() |