Spaces:
Sleeping
Sleeping
agente con chroma direttamente
Browse files
agent.py
CHANGED
|
@@ -12,6 +12,8 @@ from langchain.tools.retriever import create_retriever_tool
|
|
| 12 |
from langchain_community.tools import DuckDuckGoSearchResults
|
| 13 |
from langchain_community.vectorstores import Chroma
|
| 14 |
import json
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
|
|
@@ -114,13 +116,35 @@ sys_msg = SystemMessage(content=system_prompt)
|
|
| 114 |
|
| 115 |
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
# Usa gli stessi embeddings
|
| 118 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
| 119 |
|
| 120 |
-
#
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
)
|
| 125 |
|
| 126 |
# Crea il retriever tool
|
|
|
|
| 12 |
from langchain_community.tools import DuckDuckGoSearchResults
|
| 13 |
from langchain_community.vectorstores import Chroma
|
| 14 |
import json
|
| 15 |
+
import chromadb
|
| 16 |
+
chromadb.config.Settings.telemetry_enabled = False
|
| 17 |
|
| 18 |
|
| 19 |
|
|
|
|
| 116 |
|
| 117 |
|
| 118 |
|
| 119 |
+
with open('metadata.jsonl', 'r') as jsonl_file:
|
| 120 |
+
json_list = list(jsonl_file)
|
| 121 |
+
|
| 122 |
+
json_QA = []
|
| 123 |
+
for json_str in json_list:
|
| 124 |
+
json_data = json.loads(json_str)
|
| 125 |
+
json_QA.append(json_data)
|
| 126 |
+
|
| 127 |
# Usa gli stessi embeddings
|
| 128 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
| 129 |
|
| 130 |
+
# Inizializza Chroma
|
| 131 |
+
from langchain.schema import Document
|
| 132 |
+
from langchain_community.vectorstores import Chroma
|
| 133 |
+
|
| 134 |
+
# Prepara la lista di documenti
|
| 135 |
+
docs = []
|
| 136 |
+
for sample in json_QA:
|
| 137 |
+
print(len(docs))
|
| 138 |
+
content = f"Question : {sample['Question']}\n\nFinal answer : {sample['Final answer']}"
|
| 139 |
+
metadata = {"source": sample['task_id']}
|
| 140 |
+
doc = Document(page_content=content, metadata=metadata)
|
| 141 |
+
docs.append(doc)
|
| 142 |
+
print('fatto')
|
| 143 |
+
# Inizializza il vector store Chroma
|
| 144 |
+
vector_store = Chroma.from_documents(
|
| 145 |
+
documents=docs,
|
| 146 |
+
embedding=embeddings,
|
| 147 |
+
persist_directory="./chroma_db"
|
| 148 |
)
|
| 149 |
|
| 150 |
# Crea il retriever tool
|