Spaces:
Build error
Build error
ai should start with a message provifing context and guardrail + the output of reference is ugly
#8
by
RCaz
- opened
- agent/test.ipynb +58 -16
- app.py +7 -5
agent/test.ipynb
CHANGED
|
@@ -57,7 +57,7 @@
|
|
| 57 |
},
|
| 58 |
{
|
| 59 |
"cell_type": "code",
|
| 60 |
-
"execution_count":
|
| 61 |
"id": "fbbf5838",
|
| 62 |
"metadata": {},
|
| 63 |
"outputs": [],
|
|
@@ -135,7 +135,7 @@
|
|
| 135 |
},
|
| 136 |
{
|
| 137 |
"cell_type": "code",
|
| 138 |
-
"execution_count":
|
| 139 |
"id": "aba59d80",
|
| 140 |
"metadata": {},
|
| 141 |
"outputs": [
|
|
@@ -143,7 +143,7 @@
|
|
| 143 |
"name": "stderr",
|
| 144 |
"output_type": "stream",
|
| 145 |
"text": [
|
| 146 |
-
"/var/folders/dv/gzhyqctn53s9bh23g7tbvl940000gn/T/
|
| 147 |
" embedding_model = HuggingFaceEmbeddings(\n"
|
| 148 |
]
|
| 149 |
}
|
|
@@ -165,19 +165,7 @@
|
|
| 165 |
"execution_count": null,
|
| 166 |
"id": "da07d2c2",
|
| 167 |
"metadata": {},
|
| 168 |
-
"outputs": [
|
| 169 |
-
{
|
| 170 |
-
"ename": "AttributeError",
|
| 171 |
-
"evalue": "'builtin_function_or_method' object has no attribute 'date'",
|
| 172 |
-
"output_type": "error",
|
| 173 |
-
"traceback": [
|
| 174 |
-
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
| 175 |
-
"\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)",
|
| 176 |
-
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[44]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mdatetime\u001b[49m\u001b[43m.\u001b[49m\u001b[43mnow\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdate\u001b[49m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mlangchain_community\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvectorstores\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m FAISS\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mlangchain_community\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mvectorstores\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m DistanceStrategy\n",
|
| 177 |
-
"\u001b[31mAttributeError\u001b[39m: 'builtin_function_or_method' object has no attribute 'date'"
|
| 178 |
-
]
|
| 179 |
-
}
|
| 180 |
-
],
|
| 181 |
"source": [
|
| 182 |
"from langchain_community.vectorstores import FAISS\n",
|
| 183 |
"from langchain_community.vectorstores.utils import DistanceStrategy\n",
|
|
@@ -221,6 +209,60 @@
|
|
| 221 |
"# Download files from Azure\n",
|
| 222 |
"load_from_azure(\"blobcontaineravatarbot\")"
|
| 223 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
}
|
| 225 |
],
|
| 226 |
"metadata": {
|
|
|
|
| 57 |
},
|
| 58 |
{
|
| 59 |
"cell_type": "code",
|
| 60 |
+
"execution_count": 2,
|
| 61 |
"id": "fbbf5838",
|
| 62 |
"metadata": {},
|
| 63 |
"outputs": [],
|
|
|
|
| 135 |
},
|
| 136 |
{
|
| 137 |
"cell_type": "code",
|
| 138 |
+
"execution_count": 3,
|
| 139 |
"id": "aba59d80",
|
| 140 |
"metadata": {},
|
| 141 |
"outputs": [
|
|
|
|
| 143 |
"name": "stderr",
|
| 144 |
"output_type": "stream",
|
| 145 |
"text": [
|
| 146 |
+
"/var/folders/dv/gzhyqctn53s9bh23g7tbvl940000gn/T/ipykernel_5272/3187483442.py:4: LangChainDeprecationWarning: The class `HuggingFaceEmbeddings` was deprecated in LangChain 0.2.2 and will be removed in 1.0. An updated version of the class exists in the `langchain-huggingface package and should be used instead. To use it run `pip install -U `langchain-huggingface` and import as `from `langchain_huggingface import HuggingFaceEmbeddings``.\n",
|
| 147 |
" embedding_model = HuggingFaceEmbeddings(\n"
|
| 148 |
]
|
| 149 |
}
|
|
|
|
| 165 |
"execution_count": null,
|
| 166 |
"id": "da07d2c2",
|
| 167 |
"metadata": {},
|
| 168 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
"source": [
|
| 170 |
"from langchain_community.vectorstores import FAISS\n",
|
| 171 |
"from langchain_community.vectorstores.utils import DistanceStrategy\n",
|
|
|
|
| 209 |
"# Download files from Azure\n",
|
| 210 |
"load_from_azure(\"blobcontaineravatarbot\")"
|
| 211 |
]
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"cell_type": "code",
|
| 215 |
+
"execution_count": 11,
|
| 216 |
+
"id": "32d45df8",
|
| 217 |
+
"metadata": {},
|
| 218 |
+
"outputs": [],
|
| 219 |
+
"source": [
|
| 220 |
+
"from langchain_community.vectorstores import FAISS\n",
|
| 221 |
+
"from langchain_community.embeddings import HuggingFaceEmbeddings # deprecated\n",
|
| 222 |
+
"# from langchain_huggingface import HuggingFaceEmbeddings\n",
|
| 223 |
+
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
| 224 |
+
"from transformers import AutoTokenizer\n",
|
| 225 |
+
"\n",
|
| 226 |
+
"\n",
|
| 227 |
+
"\n",
|
| 228 |
+
"tokenizer_name = \"intfloat/e5-base-v2\"\n",
|
| 229 |
+
"embedding_model = HuggingFaceEmbeddings(\n",
|
| 230 |
+
" model_name=tokenizer_name,\n",
|
| 231 |
+
" # multi_process=True,\n",
|
| 232 |
+
" model_kwargs={\"device\": \"mps\"}, # use cuda for faster embeddings on nbidia GPUs\n",
|
| 233 |
+
" encode_kwargs={\"normalize_embeddings\": True}, # Set `True` for cosine similarity\n",
|
| 234 |
+
")\n",
|
| 235 |
+
"\n",
|
| 236 |
+
"vs = FAISS.load_local(\"../data/FAISS/512-intfloat-e5-base-v2-2026-01-16\",embedding_model,allow_dangerous_deserialization=True)"
|
| 237 |
+
]
|
| 238 |
+
},
|
| 239 |
+
{
|
| 240 |
+
"cell_type": "code",
|
| 241 |
+
"execution_count": null,
|
| 242 |
+
"id": "4d166ca0",
|
| 243 |
+
"metadata": {},
|
| 244 |
+
"outputs": [
|
| 245 |
+
{
|
| 246 |
+
"data": {
|
| 247 |
+
"text/plain": [
|
| 248 |
+
"[(Document(id='5269b130-6f0c-4887-8214-25493e7345f1', metadata={'producer': 'Microsoft® Office Word 2007', 'creator': 'Microsoft® Office Word 2007', 'creationdate': '2014-01-14T13:26:10+01:00', 'author': 'rcazelles', 'moddate': '2014-01-14T13:26:10+01:00', 'source': '../data/research_paper/ENSCM_2013_CAZELLES.pdf', 'total_pages': 196, 'page': 4, 'page_label': '5', 'start_index': 1723}, page_content='. \\nJe remercie sincèrement toutes les personnes qui m’ont permis de mener à bien ce travail : \\nThomas Cacciaguerra, Jullien Drone, Annie Finiels, Philippe Gonzalez , Mourad Guermache , \\nGéraldine Layrac et Peralta Pradial de l’Institut Charles Gehrardt de Montpellier, Ovidiu Ersen et \\nSimona Moldovan de l’Institut de Physique et Chimie des Matériaux de Strasbourg, Jian Liu et \\nMarkus Antonietti de l’Institut Max Planck des Colloides et Interfaces de Potsdam. \\nJe tiens à remer cier chaleureusement Pierre Agulhon, Charlie Basset, Siham Behar, Mélanie \\nBordeaux, Arnaud Chaix, Eddy Dib, Isabelle Girard, Marie-Noëlle Labour, Antoine Lacarrière, \\nAlexander Sachse, Bilel Said, Thibault Terencio, Christophe Trouillefou, Rémi Veneziano, Julian'),\n",
|
| 249 |
+
" 0.7311910248264899),\n",
|
| 250 |
+
" (Document(id='87524f98-b762-4e56-a2fb-3c1319cc983c', metadata={'producer': 'Aspose.Pdf for .NET 8.8.0', 'creator': 'Aspose Ltd.', 'creationdate': '2014-04-28T09:28:27-04:00', 'moddate': '2014-04-28T09:28:27-04:00', 'spdf': '1112', 'source': '../data/research_paper/liu2014.pdf', 'total_pages': 16, 'page': 15, 'page_label': '16', 'start_index': -1}, page_content='.; Blank, D. H.; ten Elshof, J. E., Small 2011 , 7, 2709-2713. \\n(42) Liu, Y.; Wang, H.; Wang, Y .; Xu, H.; Li, M.; Shen, H., Chemical Communications 2011 , 47 , 3790-3792. \\n(43) Cazelles, R.; Drone, J.; Fajula, F.; Ersen, O .; Moldovan, S.; Galarneau, A., New J. Chem. 2013 , 37 , 3721-3730. \\n(44) Duan, Z.; Sun, R., Chem. Geol. 2003 , 193 , 257-271. \\n(45) Yadav, R. K.; Baeg, J.-O.; Oh, G. H.; Park, N .-J.; Kong, K.-J.; Kim, J.; Hwang, D. W.; Biswas, S. K., J. Am. Chem. Soc. \\n2012 , 134 , 11455–11461. \\n(46) Zhou, Z.; Hartmann, M., Chem. Soc. Rev. 2013 , 42 , 3894-3912. \\n \\n \\n \\n \\n \\nPage 15 of 15 Physical Chemistry Chemical Physics\\nPhysical Chemistry Chemical Physics Accepted Manuscript\\nPublished on 28 April 2014. Downloaded by University of Waterloo on 10/06/2014 15:40:59. \\nView Article Online\\nDOI: 10.1039/C4CP01348D'),\n",
|
| 251 |
+
" 0.707401510139031),\n",
|
| 252 |
+
" (Document(id='d4b16de4-ae6b-43b8-ae1e-c8366d418f95', metadata={'producer': 'Microsoft® Office Word 2007', 'creator': 'Microsoft® Office Word 2007', 'creationdate': '2014-01-14T13:26:10+01:00', 'author': 'rcazelles', 'moddate': '2014-01-14T13:26:10+01:00', 'source': '../data/research_paper/ENSCM_2013_CAZELLES.pdf', 'total_pages': 196, 'page': 0, 'page_label': '1', 'start_index': 0}, page_content=\"Délivré par L’ÉCOLE NATIONALE SUPÉRIEURE DE CHIMIE DE \\nMONTPELLIER \\n \\n \\nPréparée au sein de l’école doctorale Sciences Chimiques \\nEt de l’unité de recherche UMR 5253 \\n \\nSpécialité : Biochimie et Chimie des matériaux \\n \\n \\n \\n \\n \\nPrésentée par Rémi CAZELLES \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\nSoutenue le 13 décembre 2013 devant le jury composé de \\n \\n \\n-Mr Eric MARCEAU, Maître de conférences, UMR 7197 \\nUniversité Pierre et Marie Curie, Paris VI \\nRapporteur \\n-Mme Isabelle CHEVALOT, Professeur, UMR 7274 \\nInstitut National Polytechnique de Loraine, Nancy \\nRapporteur \\n-Mr Joël CHOPINEAU, Professeur, UMR 5253 \\nInstitut Charles Gerhardt de Montpellier \\nExaminateur \\n-Mr Alain WALCARIUS, DR1, UMR 7564 \\nLaboratoire de Chimie Physique et Microbiologie \\npour l’Environnement, Nancy \\nExaminateur \\n-Mr Benjamin ERABLE, CR2, UMR 5503 \\nLaboratoire de Génie Chimique de Toulouse \\nExaminateur \\n-Mme Anne GALARNEAU, DR2, UMR 5253 \\nInstitut Charles Gerhardt de Montpellier \\nDirecteur de thèse \\n \\n \\n[Tapez une citation prise dans le document \\nou la synthèse d'un passage intéressant. Vous \\npouvez placer la zone de texte n'importe où \\ndans le document. Utilisez l'onglet Outils de \\nzone de texte pour modifier la mise en forme \\nde la zone de texte de la citation.] \\nBioconversion du CO2 en méthanol par \\nun système polyenzymatique encapsulé \\ndans des nanocapsules poreuses de silice\"),\n",
|
| 253 |
+
" 0.7028941154250334),\n",
|
| 254 |
+
" (Document(id='6a57d507-fa1d-45da-8140-a0c24a95035f', metadata={'producer': 'Microsoft® Office Word 2007', 'creator': 'Microsoft® Office Word 2007', 'creationdate': '2014-01-14T13:26:10+01:00', 'author': 'rcazelles', 'moddate': '2014-01-14T13:26:10+01:00', 'source': '../data/research_paper/ENSCM_2013_CAZELLES.pdf', 'total_pages': 196, 'page': 2, 'page_label': '3', 'start_index': 0}, page_content=\"Délivré par L’ÉCOLE NATIONALE SUPÉRIEURE DE CHIMIE DE \\nMONTPELLIER \\n \\n \\nPréparée au sein de l’école doctorale Sciences Chimiques \\nEt de l’unité de recherche UMR 5253 \\n \\nSpécialité : Biochimie et Chimie des matériaux \\n \\n \\n \\n \\n \\nPrésentée par Rémi CAZELLES \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\nSoutenue le 13 décembre 2013 devant le jury composé de \\n \\n \\n-Mr Eric MARCEAU, Maître de conférences, UMR 7197 \\nUniversité Pierre et Marie Curie, Paris VI \\nRapporteur \\n-Mme Isabelle CHEVALOT, Professeur, UMR 7274 \\nInstitut National Polytechnique de Loraine, Nancy \\nRapporteur \\n-Mr Joël CHOPINEAU, Professeur, UMR 5253 \\nInstitut Charles Gerhardt de Montpellier \\nExaminateur \\n-Mr Alain WALCARIUS, DR1, UMR 7564 \\nLaboratoire de Chimie Physique et Microbiologie \\npour l’Environnement, Nancy \\nExaminateur \\n-Mr Benjamin ERABLE, CR2, UMR 5503 \\nLaboratoire de Génie Chimique de Toulouse \\nExaminateur \\n-Mme Anne GALARNEAU, DR2, UMR 5253 \\nInstitut Charles Gerhardt de Montpellier \\nDirecteur de thèse \\n \\n \\n \\nBioconversion du CO2 en méthanol par \\nun système polyenzymatique encapsulé \\ndans des nanocapsules poreuses de silice \\n[Tapez une citation prise dans le document \\nou la synthèse d'un passage intéressant. Vous \\npouvez placer la zone de texte n'importe où \\ndans le document. Utilisez l'onglet Outils de \\nzone de texte pour modifier la mise en forme \\nde la zone de texte de la citation.]\"),\n",
|
| 255 |
+
" 0.6995823846564415)]"
|
| 256 |
+
]
|
| 257 |
+
},
|
| 258 |
+
"execution_count": 12,
|
| 259 |
+
"metadata": {},
|
| 260 |
+
"output_type": "execute_result"
|
| 261 |
+
}
|
| 262 |
+
],
|
| 263 |
+
"source": [
|
| 264 |
+
"docs = vs._similarity_search_with_relevance_scores(\"remi cazelles research work\")"
|
| 265 |
+
]
|
| 266 |
}
|
| 267 |
],
|
| 268 |
"metadata": {
|
app.py
CHANGED
|
@@ -138,16 +138,17 @@ def predict(message, history, request: gr.Request):
|
|
| 138 |
|
| 139 |
|
| 140 |
# RAG tool
|
| 141 |
-
RAG_PROMPT_TEMPLATE="""
|
|
|
|
| 142 |
give a comprehensive answer to the question.
|
| 143 |
-
Respond
|
| 144 |
-
|
| 145 |
-
If the answer cannot be deduced from the context, do not give an answer.
|
| 146 |
"""
|
| 147 |
|
| 148 |
|
| 149 |
# Create the prompt with system message, context, and conversation history
|
| 150 |
messages = [SystemMessage(content=RAG_PROMPT_TEMPLATE)]
|
|
|
|
| 151 |
messages.extend(history_langchain_format)
|
| 152 |
combined_message = f"Context: {context}\n\nQuestion: {message}"
|
| 153 |
messages.append(HumanMessage(content=combined_message))
|
|
@@ -165,8 +166,9 @@ def predict(message, history, request: gr.Request):
|
|
| 165 |
}
|
| 166 |
)
|
| 167 |
|
|
|
|
| 168 |
source_context = "\nSources:\n" + "\n".join([
|
| 169 |
-
f"{doc.metadata["source"].split("/")[-1]} ({doc.metadata.
|
| 170 |
for i, doc in enumerate(relevant_docs)])
|
| 171 |
|
| 172 |
print(gpt_response.content )
|
|
|
|
| 138 |
|
| 139 |
|
| 140 |
# RAG tool
|
| 141 |
+
RAG_PROMPT_TEMPLATE="""You will be asked information about Rémi Cazelles's projects, work and education.
|
| 142 |
+
Using the information contained in the context,
|
| 143 |
give a comprehensive answer to the question.
|
| 144 |
+
Respond to the question asked with enought details, response should be precise and relevant to the question.
|
| 145 |
+
If the answer cannot be deduced from the context, simply says you can't find information.
|
|
|
|
| 146 |
"""
|
| 147 |
|
| 148 |
|
| 149 |
# Create the prompt with system message, context, and conversation history
|
| 150 |
messages = [SystemMessage(content=RAG_PROMPT_TEMPLATE)]
|
| 151 |
+
messages.extend[AIMessage(content="This bot allows you finding informations related to Rémi Cazelles's projects, work and education")]
|
| 152 |
messages.extend(history_langchain_format)
|
| 153 |
combined_message = f"Context: {context}\n\nQuestion: {message}"
|
| 154 |
messages.append(HumanMessage(content=combined_message))
|
|
|
|
| 166 |
}
|
| 167 |
)
|
| 168 |
|
| 169 |
+
|
| 170 |
source_context = "\nSources:\n" + "\n".join([
|
| 171 |
+
f"{i+1} : {doc.metadata["source"].split("/")[-1]} (page {doc.metadata['page_label']}/{doc.metadata['total_pages']})\n---"
|
| 172 |
for i, doc in enumerate(relevant_docs)])
|
| 173 |
|
| 174 |
print(gpt_response.content )
|