Spaces:
Running
Running
Commit
·
438d4f9
1
Parent(s):
236cc74
Dynamic Search Parameters Added
Browse files- __pycache__/config.cpython-310.pyc +0 -0
- __pycache__/graph_agentB.cpython-310.pyc +0 -0
- __pycache__/pinecone_utilsB.cpython-310.pyc +0 -0
- app.py +46 -14
- config.py +18 -2
- emissions.csv.bak +18 -0
- graph_agentB.py +14 -1
- graph_agentC.py +3 -0
- index_documents.py +0 -1
- neo4j_utils.py +6 -10
- pinecone_utilsB.py +33 -30
__pycache__/config.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/config.cpython-310.pyc and b/__pycache__/config.cpython-310.pyc differ
|
|
|
__pycache__/graph_agentB.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/graph_agentB.cpython-310.pyc and b/__pycache__/graph_agentB.cpython-310.pyc differ
|
|
|
__pycache__/pinecone_utilsB.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/pinecone_utilsB.cpython-310.pyc and b/__pycache__/pinecone_utilsB.cpython-310.pyc differ
|
|
|
app.py
CHANGED
|
@@ -33,24 +33,41 @@ def check_indexes_ready():
|
|
| 33 |
return False
|
| 34 |
|
| 35 |
def process_query(query, architecture: Literal["A", "B", "C"]):
|
| 36 |
-
"""Traite la requête de l'utilisateur avec l'architecture A."""
|
| 37 |
|
| 38 |
# Reload conversation
|
| 39 |
display_chat_history()
|
| 40 |
|
| 41 |
-
config = {
|
| 42 |
|
| 43 |
if architecture == "A":
|
| 44 |
agent = agent_A
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
| 49 |
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
-
|
| 53 |
-
initial_state = {"query": query, "messages": [], "relevant_docs": [], "response": ""}
|
| 54 |
|
| 55 |
with st.chat_message("user"):
|
| 56 |
st.markdown(query)
|
|
@@ -60,21 +77,19 @@ def process_query(query, architecture: Literal["A", "B", "C"]):
|
|
| 60 |
start_time = time.time() # Start timing
|
| 61 |
|
| 62 |
events = agent.stream(initial_state, config=config, stream_mode="messages")
|
| 63 |
-
|
| 64 |
# Ajouter le message du chatbot avec streaming
|
| 65 |
with st.chat_message("assistant"):
|
| 66 |
response_placeholder = st.empty()
|
| 67 |
|
| 68 |
-
|
| 69 |
for event in events:
|
| 70 |
for message in event:
|
| 71 |
if isinstance(message, AIMessageChunk):
|
| 72 |
-
# Vérifiez le type de message et affichez son contenu
|
| 73 |
if hasattr(message, 'content'):
|
| 74 |
full_response += message.content
|
| 75 |
|
| 76 |
-
|
| 77 |
-
|
| 78 |
|
| 79 |
response_placeholder.markdown(full_response)
|
| 80 |
|
|
@@ -131,6 +146,23 @@ def display_sidebar():
|
|
| 131 |
|
| 132 |
# Token metrics containers
|
| 133 |
# st.sidebar.markdown("### Tokens")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
st.sidebar.markdown("### API Mistral AI")
|
| 135 |
|
| 136 |
# HTML pour le tableau sans index ni colonnes inutiles
|
|
|
|
| 33 |
return False
|
| 34 |
|
| 35 |
def process_query(query, architecture: Literal["A", "B", "C"]):
|
| 36 |
+
"""Traite la requête de l'utilisateur avec l'architecture A, B ou C."""
|
| 37 |
|
| 38 |
# Reload conversation
|
| 39 |
display_chat_history()
|
| 40 |
|
| 41 |
+
config = {"metadata": {"architecture": architecture}, "tags": ["arch_" + architecture]}
|
| 42 |
|
| 43 |
if architecture == "A":
|
| 44 |
agent = agent_A
|
| 45 |
+
initial_state = {
|
| 46 |
+
"query": query,
|
| 47 |
+
"messages": [],
|
| 48 |
+
"relevant_docs": [],
|
| 49 |
+
"response": "",
|
| 50 |
+
}
|
| 51 |
|
| 52 |
+
elif architecture in ["B", "C"]:
|
| 53 |
+
agent = agent_B if architecture == "B" else agent_C
|
| 54 |
+
|
| 55 |
+
# Récupération des paramètres dynamiques uniquement pour B et C
|
| 56 |
+
k = st.session_state.get("k", 30) # Nombre de documents
|
| 57 |
+
alpha = st.session_state.get("alpha", 0.5) # Pondération hybride
|
| 58 |
+
similarity_threshold = st.session_state.get("similarity_threshold", 0.7) # similarité cosinus
|
| 59 |
+
|
| 60 |
+
initial_state = {
|
| 61 |
+
"query": query,
|
| 62 |
+
"messages": [],
|
| 63 |
+
"relevant_docs": [],
|
| 64 |
+
"response": "",
|
| 65 |
+
"k": k,
|
| 66 |
+
"alpha": alpha,
|
| 67 |
+
"similarity_threshold": similarity_threshold,
|
| 68 |
+
}
|
| 69 |
|
| 70 |
+
st.session_state.chat_history.append({"role": "user", "content": query})
|
|
|
|
| 71 |
|
| 72 |
with st.chat_message("user"):
|
| 73 |
st.markdown(query)
|
|
|
|
| 77 |
start_time = time.time() # Start timing
|
| 78 |
|
| 79 |
events = agent.stream(initial_state, config=config, stream_mode="messages")
|
| 80 |
+
|
| 81 |
# Ajouter le message du chatbot avec streaming
|
| 82 |
with st.chat_message("assistant"):
|
| 83 |
response_placeholder = st.empty()
|
| 84 |
|
|
|
|
| 85 |
for event in events:
|
| 86 |
for message in event:
|
| 87 |
if isinstance(message, AIMessageChunk):
|
|
|
|
| 88 |
if hasattr(message, 'content'):
|
| 89 |
full_response += message.content
|
| 90 |
|
| 91 |
+
# Supprimer les requêtes Cypher avant l'affichage
|
| 92 |
+
full_response = re.sub(r"(?i)(MATCH|CREATE|MERGE|DELETE|CALL)[\s\S]+?;", "", full_response).strip()
|
| 93 |
|
| 94 |
response_placeholder.markdown(full_response)
|
| 95 |
|
|
|
|
| 146 |
|
| 147 |
# Token metrics containers
|
| 148 |
# st.sidebar.markdown("### Tokens")
|
| 149 |
+
|
| 150 |
+
st.markdown("### Paramètres de la recherche hybride")
|
| 151 |
+
|
| 152 |
+
# Sélection du nombre de documents (k)
|
| 153 |
+
k = st.number_input("Nombre de documents à récupérer (k)", min_value=1, max_value=100, value=30, step=1)
|
| 154 |
+
|
| 155 |
+
# Sélection du score de similarité cosinus(entre la requête et les documents pertinents)
|
| 156 |
+
similarity_threshold = st.slider("Score de similarité cosinus entre la requête et les documents pertinents", 0.0, 1.0, 0.7, 0.05)
|
| 157 |
+
|
| 158 |
+
# Sélection du coefficient de pondération alpha
|
| 159 |
+
alpha = st.slider("Équilibre entre recherche sémantique et syntaxique (pour alpha = 0.0, la recherche est purement syntaxique)", 0.0, 1.0, 0.5, 0.05)
|
| 160 |
+
|
| 161 |
+
# Sauvegarde des valeurs dans la session
|
| 162 |
+
st.session_state['k'] = k
|
| 163 |
+
st.session_state['alpha'] = alpha
|
| 164 |
+
st.session_state['similarité'] = similarity_threshold
|
| 165 |
+
|
| 166 |
st.sidebar.markdown("### API Mistral AI")
|
| 167 |
|
| 168 |
# HTML pour le tableau sans index ni colonnes inutiles
|
config.py
CHANGED
|
@@ -20,6 +20,7 @@ pc = Pinecone(api_key=pinecone_api_key)
|
|
| 20 |
# Configuration des index
|
| 21 |
sparse_index_name = "confession-muette-sparse"
|
| 22 |
dense_index_name = "confession-muette-dense"
|
|
|
|
| 23 |
|
| 24 |
# LangSmith configuration
|
| 25 |
langsmith_endpoint = os.getenv("LANGSMITH_ENDPOINT")
|
|
@@ -82,10 +83,25 @@ if dense_index_name not in all_names:
|
|
| 82 |
except Exception as e:
|
| 83 |
print("Erreur lors de la création de l'index dense:", e)
|
| 84 |
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
# Connexion aux index
|
| 88 |
sparse_index = pc.Index(sparse_index_name)
|
| 89 |
dense_index = pc.Index(dense_index_name)
|
| 90 |
-
|
|
|
|
| 91 |
|
|
|
|
| 20 |
# Configuration des index
|
| 21 |
sparse_index_name = "confession-muette-sparse"
|
| 22 |
dense_index_name = "confession-muette-dense"
|
| 23 |
+
judge_index_name = "confession-muette-judge"
|
| 24 |
|
| 25 |
# LangSmith configuration
|
| 26 |
langsmith_endpoint = os.getenv("LANGSMITH_ENDPOINT")
|
|
|
|
| 83 |
except Exception as e:
|
| 84 |
print("Erreur lors de la création de l'index dense:", e)
|
| 85 |
|
| 86 |
+
# Créer l'index judge s'il n'existe pas
|
| 87 |
+
if judge_index_name not in all_names:
|
| 88 |
+
print(f"L'index '{judge_index_name}' n'existe pas. Création en cours...")
|
| 89 |
+
try:
|
| 90 |
+
pc.create_index(
|
| 91 |
+
name=judge_index_name,
|
| 92 |
+
metric="dotproduct",
|
| 93 |
+
dimension=1024,
|
| 94 |
+
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
|
| 95 |
+
)
|
| 96 |
+
print(f"Index '{judge_index_name}' créé avec succès.")
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print("Erreur lors de la création de l'index sparse:", e)
|
| 99 |
+
|
| 100 |
+
print(f"Les index '{sparse_index_name}' , '{dense_index_name}' et '{judge_index_name}' sont prêts.")
|
| 101 |
|
| 102 |
# Connexion aux index
|
| 103 |
sparse_index = pc.Index(sparse_index_name)
|
| 104 |
dense_index = pc.Index(dense_index_name)
|
| 105 |
+
judge_index = pc.Index(judge_index_name)
|
| 106 |
+
print(f"Connexion aux index '{sparse_index_name}' , '{dense_index_name}' et '{judge_index_name}' réussie.")
|
| 107 |
|
emissions.csv.bak
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
timestamp,project_name,run_id,experiment_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
|
| 2 |
+
2025-03-11T09:09:17,codecarbon,6746b1d6-a00b-4535-85f0-5d07699044fc,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,0.343940399994608,1.0596201036586613e-07,3.080824769859177e-07,14.0,0.0,5.9043402671813965,1.3308975555305047e-06,0,5.599643507466613e-07,1.8908619062771664e-06,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 3 |
+
2025-03-11T09:09:28,codecarbon,6746b1d6-a00b-4535-85f0-5d07699044fc,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,11.03492499999993,3.41777160839172e-06,3.0972313888782584e-07,14.0,0.0,5.9043402671813965,4.289979011106576e-05,0,1.8089371158616417e-05,6.098916126968218e-05,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 4 |
+
2025-03-11T09:10:03,codecarbon,6746b1d6-a00b-4535-85f0-5d07699044fc,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,46.45240809999814,1.439103128818192e-05,3.098016201270413e-07,14.0,0.0,5.9043402671813965,0.000180629357611,0,7.617449842101004e-05,0.000256803856032,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 5 |
+
2025-03-11T09:10:14,codecarbon,6746b1d6-a00b-4535-85f0-5d07699044fc,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,57.77152389999537,1.7897503602431466e-05,3.097980180237707e-07,14.0,0.0,5.9043402671813965,0.0002246418070555,0,9.473404908808602e-05,0.0003193758561436,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 6 |
+
2025-03-11T09:10:40,codecarbon,6746b1d6-a00b-4535-85f0-5d07699044fc,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,83.21980689999327,2.57817734186566e-05,3.0980333143092986e-07,14.0,0.0,5.9043402671813965,0.0003236005359999,0,0.0001364678702645,0.0004600684062645,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 7 |
+
2025-03-11T09:23:31,codecarbon,c6623f43-59ad-43c4-86be-5acc04841060,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,0.1962917000055313,6.015137318312737e-08,3.064386990455141e-07,14.0,0.0,5.9043402671813965,7.561085000068285e-07,0,3.172756286023072e-07,1.0733841286091357e-06,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 8 |
+
2025-03-11T09:24:00,codecarbon,c6623f43-59ad-43c4-86be-5acc04841060,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,29.127897900005337,9.023727829842991e-06,3.097967405962838e-07,14.0,0.0,5.9043402671813965,0.0001132624655555,0,4.776338840049357e-05,0.000161025853956,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 9 |
+
2025-03-11T09:28:00,codecarbon,c6623f43-59ad-43c4-86be-5acc04841060,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,269.73810180000146,8.357370895910917e-05,3.0983279114596604e-07,14.0,0.0,5.9043402671813965,0.0010489653354444,0,0.0004423837064568,0.0014913490419013,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 10 |
+
2025-03-11T09:45:29,codecarbon,c6623f43-59ad-43c4-86be-5acc04841060,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,1318.7430936000092,0.0004085957645354,3.0983727347535334e-07,14.0,0.0,5.9043402671813965,0.005128426107,0,0.0021628489788416,0.0072912750858416,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 11 |
+
2025-03-11T09:45:33,codecarbon,c6623f43-59ad-43c4-86be-5acc04841060,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,1322.6812570999973,0.0004098154171413,3.098368673038492e-07,14.0,0.0,5.9043402671813965,0.0051437358322777,0,0.0021693036070659,0.0073130394393436,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 12 |
+
2025-03-11T10:08:55,codecarbon,0069d3eb-d74a-4e2d-8bd0-0e3492cf6afe,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,0.2247720000013942,6.921696286853276e-08,3.079429949820415e-07,14.0,0.0,5.9043402671813965,8.697468888648372e-07,0,3.654101065942672e-07,1.2351569954591044e-06,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 13 |
+
2025-03-11T10:11:10,codecarbon,0069d3eb-d74a-4e2d-8bd0-0e3492cf6afe,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,135.18873959999473,4.1885770293869065e-05,3.0983179825334133e-07,14.0,0.0,5.9043402671813965,0.0005257239851111,0,0.0002217156603834,0.0007474396454945,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 14 |
+
2025-03-11T10:13:22,codecarbon,0069d3eb-d74a-4e2d-8bd0-0e3492cf6afe,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,267.0583891999995,8.2743590012402e-05,3.098333299330862e-07,14.0,0.0,5.9043402671813965,0.0010385451210555,0,0.0004379906845869,0.0014765358056425,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 15 |
+
2025-03-11T10:13:27,codecarbon,0069d3eb-d74a-4e2d-8bd0-0e3492cf6afe,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,272.09710749999795,8.430420405907759e-05,3.098313129222744e-07,14.0,0.0,5.9043402671813965,0.0010581339039443,0,0.0004462506150348,0.0015043845189792,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 16 |
+
2025-03-11T10:13:35,codecarbon,0069d3eb-d74a-4e2d-8bd0-0e3492cf6afe,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,279.8871901999956,8.671737661900644e-05,3.0982974446612534e-07,14.0,0.0,5.9043402671813965,0.0010884237664999,0,0.0004590231297509,0.0015474468962509,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 17 |
+
2025-03-11T10:13:39,codecarbon,0069d3eb-d74a-4e2d-8bd0-0e3492cf6afe,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,284.0483774000022,8.800610287334622e-05,3.0982786692498645e-07,14.0,0.0,5.9043402671813965,0.0011045999704444,0,0.0004658438788988,0.0015704438493432,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
| 18 |
+
2025-03-11T10:13:49,codecarbon,0069d3eb-d74a-4e2d-8bd0-0e3492cf6afe,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,293.94846909999615,9.107300579774026e-05,3.0982643344456e-07,14.0,0.0,5.9043402671813965,0.001143094986833312,0,0.0004820768720103549,0.0016251718588436669,France,FRA,nouvelle-aquitaine,,,Windows-10-10.0.19045-SP0,3.10.16,2.8.3,8,11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz,,,-0.5286,44.8567,15.74490737915039,machine,N,1.0
|
graph_agentB.py
CHANGED
|
@@ -11,11 +11,19 @@ class GraphState(TypedDict):
|
|
| 11 |
query: str
|
| 12 |
relevant_docs: list
|
| 13 |
response: str
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
def retrieve_combined(state: GraphState) -> dict:
|
| 17 |
"""Récupération hybride : Pinecone (sémantique) + BM25 (mots-clés)."""
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
return {"relevant_docs": relevant_docs}
|
| 20 |
|
| 21 |
def generate_response(state: GraphState) -> dict:
|
|
@@ -30,6 +38,11 @@ def generate_response(state: GraphState) -> dict:
|
|
| 30 |
- Utilisez les mots-clés pertinents de manière naturelle dans votre réponse.
|
| 31 |
- Expliquez les concepts en vous appuyant sur le contexte sémantique.
|
| 32 |
- Ne mentionnez pas explicitement les termes "recherche sémantique" ou "recherche par mots-clés".
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
**Informations pertinentes trouvées** :
|
| 35 |
{context}
|
|
|
|
| 11 |
query: str
|
| 12 |
relevant_docs: list
|
| 13 |
response: str
|
| 14 |
+
k: int
|
| 15 |
+
alpha: float
|
| 16 |
+
similarity_threshold: float
|
| 17 |
|
| 18 |
|
| 19 |
def retrieve_combined(state: GraphState) -> dict:
|
| 20 |
"""Récupération hybride : Pinecone (sémantique) + BM25 (mots-clés)."""
|
| 21 |
+
k = st.session_state.get("k", 30) # Valeur par défaut 30
|
| 22 |
+
alpha = st.session_state.get("alpha", 0.5) # Valeur par défaut 0.5
|
| 23 |
+
similarity_threshold = st.session_state.get('similarity_threshold', 0.7) # Valeur par défaut 0.7
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
relevant_docs = hybrid_search(state["query"], alpha=alpha, k=k, similarity_threshold=similarity_threshold)
|
| 27 |
return {"relevant_docs": relevant_docs}
|
| 28 |
|
| 29 |
def generate_response(state: GraphState) -> dict:
|
|
|
|
| 38 |
- Utilisez les mots-clés pertinents de manière naturelle dans votre réponse.
|
| 39 |
- Expliquez les concepts en vous appuyant sur le contexte sémantique.
|
| 40 |
- Ne mentionnez pas explicitement les termes "recherche sémantique" ou "recherche par mots-clés".
|
| 41 |
+
- **Si la réponse doit être une liste retournez chaque élément sur **une nouvelle ligne**.
|
| 42 |
+
- Format correct attendu :
|
| 43 |
+
1. Élément 1
|
| 44 |
+
2. Élément 2
|
| 45 |
+
3. Élément 3
|
| 46 |
|
| 47 |
**Informations pertinentes trouvées** :
|
| 48 |
{context}
|
graph_agentC.py
CHANGED
|
@@ -12,6 +12,9 @@ class GraphState(TypedDict):
|
|
| 12 |
relevant_docs: list # Résultats de la recherche hybride
|
| 13 |
neo4j_results: list # Résultats de la recherche Neo4j
|
| 14 |
response: str
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
def retrieve_unified(state: GraphState) -> dict:
|
|
|
|
| 12 |
relevant_docs: list # Résultats de la recherche hybride
|
| 13 |
neo4j_results: list # Résultats de la recherche Neo4j
|
| 14 |
response: str
|
| 15 |
+
k: int
|
| 16 |
+
alpha: float
|
| 17 |
+
similarity_threshold: float
|
| 18 |
|
| 19 |
|
| 20 |
def retrieve_unified(state: GraphState) -> dict:
|
index_documents.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# index_documents.py
|
| 2 |
from pinecone_utilsA import index_pdf as index_pdf_A
|
| 3 |
from pinecone_utilsB import *
|
| 4 |
from pdf_processing import get_existing_pdf, load_and_preprocess_pdf, split_text
|
|
|
|
|
|
|
| 1 |
from pinecone_utilsA import index_pdf as index_pdf_A
|
| 2 |
from pinecone_utilsB import *
|
| 3 |
from pdf_processing import get_existing_pdf, load_and_preprocess_pdf, split_text
|
neo4j_utils.py
CHANGED
|
@@ -57,35 +57,27 @@ def generate_cypher_query(user_query):
|
|
| 57 |
prompt = f"""
|
| 58 |
Vous êtes un générateur de requêtes Cypher pour une base de données Neo4j.
|
| 59 |
Étant donné une demande de l'utilisateur, générez une requête Cypher correspondante.
|
| 60 |
-
|
| 61 |
**Instructions supplémentaires** :
|
| 62 |
- Utilisez `DISTINCT` pour éviter les répétitions.
|
| 63 |
- Optimisez la requête pour éviter les chemins redondants.
|
| 64 |
- Tous les nœuds ont uniquement la propriété `id`. Utilisez `id` pour filtrer ou retourner des valeurs.
|
| 65 |
- Utilisez uniquement les nœuds et relations autorisés suivants :
|
| 66 |
-
|
| 67 |
**Nœuds autorisés** :
|
| 68 |
{", ".join(allowed_nodes)}
|
| 69 |
-
|
| 70 |
**Relations autorisées** :
|
| 71 |
{", ".join(allowed_relationships)}
|
| 72 |
-
|
| 73 |
**Exemples de requêtes** :
|
| 74 |
- Demande : "Trouver tous les lieux mentionnés dans une histoire."
|
| 75 |
Requête : MATCH (n:Personnage) RETURN n.id;
|
| 76 |
-
|
| 77 |
- Demande : "Lister tous les personnages qui connaissent Zéphyrine."
|
| 78 |
Requête : MATCH (p1:Personnage)-[:CONNAÎT]->(p2:Personnage {{name: "Zéphyrine"}}) RETURN DISTINCT p1.name
|
| 79 |
-
|
| 80 |
**Format de la réponse** :
|
| 81 |
- Ne fournissez **que la requête Cypher**, sans explications ni commentaires.
|
| 82 |
- La requête doit commencer par `MATCH`, `CREATE`, `MERGE`, `RETURN`, etc.
|
| 83 |
- La requête doit se terminer par un point-virgule (`;`).
|
| 84 |
- Si vous ne pouvez pas générer de requête, retournez une chaîne vide.
|
| 85 |
-
|
| 86 |
**Demande de l'utilisateur** :
|
| 87 |
{user_query}
|
| 88 |
-
|
| 89 |
**Requête Cypher** :
|
| 90 |
"""
|
| 91 |
|
|
@@ -145,7 +137,11 @@ def merge_results(hybrid_results, neo4j_results):
|
|
| 145 |
def unified_search(query):
|
| 146 |
"""Perform a unified search combining hybrid search and Neo4j knowledge graph search."""
|
| 147 |
# Step 1: Perform hybrid search
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
#print(f"résultat recherche hybride: {hybrid_results}")
|
| 150 |
|
| 151 |
# Step 2: Generate and execute Cypher query
|
|
@@ -160,4 +156,4 @@ def unified_search(query):
|
|
| 160 |
# Step 3: Merge results
|
| 161 |
final_results = merge_results(hybrid_results, neo4j_results)
|
| 162 |
|
| 163 |
-
return final_results
|
|
|
|
| 57 |
prompt = f"""
|
| 58 |
Vous êtes un générateur de requêtes Cypher pour une base de données Neo4j.
|
| 59 |
Étant donné une demande de l'utilisateur, générez une requête Cypher correspondante.
|
|
|
|
| 60 |
**Instructions supplémentaires** :
|
| 61 |
- Utilisez `DISTINCT` pour éviter les répétitions.
|
| 62 |
- Optimisez la requête pour éviter les chemins redondants.
|
| 63 |
- Tous les nœuds ont uniquement la propriété `id`. Utilisez `id` pour filtrer ou retourner des valeurs.
|
| 64 |
- Utilisez uniquement les nœuds et relations autorisés suivants :
|
|
|
|
| 65 |
**Nœuds autorisés** :
|
| 66 |
{", ".join(allowed_nodes)}
|
|
|
|
| 67 |
**Relations autorisées** :
|
| 68 |
{", ".join(allowed_relationships)}
|
|
|
|
| 69 |
**Exemples de requêtes** :
|
| 70 |
- Demande : "Trouver tous les lieux mentionnés dans une histoire."
|
| 71 |
Requête : MATCH (n:Personnage) RETURN n.id;
|
|
|
|
| 72 |
- Demande : "Lister tous les personnages qui connaissent Zéphyrine."
|
| 73 |
Requête : MATCH (p1:Personnage)-[:CONNAÎT]->(p2:Personnage {{name: "Zéphyrine"}}) RETURN DISTINCT p1.name
|
|
|
|
| 74 |
**Format de la réponse** :
|
| 75 |
- Ne fournissez **que la requête Cypher**, sans explications ni commentaires.
|
| 76 |
- La requête doit commencer par `MATCH`, `CREATE`, `MERGE`, `RETURN`, etc.
|
| 77 |
- La requête doit se terminer par un point-virgule (`;`).
|
| 78 |
- Si vous ne pouvez pas générer de requête, retournez une chaîne vide.
|
|
|
|
| 79 |
**Demande de l'utilisateur** :
|
| 80 |
{user_query}
|
|
|
|
| 81 |
**Requête Cypher** :
|
| 82 |
"""
|
| 83 |
|
|
|
|
| 137 |
def unified_search(query):
|
| 138 |
"""Perform a unified search combining hybrid search and Neo4j knowledge graph search."""
|
| 139 |
# Step 1: Perform hybrid search
|
| 140 |
+
k = st.session_state.get("k", 30) # Valeur par défaut 30
|
| 141 |
+
alpha = st.session_state.get("alpha", 0.5) # Valeur par défaut 0.5
|
| 142 |
+
similarity_threshold = st.session_state.get('similarity_threshold', 0.7) # Valeur par défaut 0.7
|
| 143 |
+
|
| 144 |
+
hybrid_results = hybrid_search(query, alpha=alpha, k=k, similarity_threshold=similarity_threshold)
|
| 145 |
#print(f"résultat recherche hybride: {hybrid_results}")
|
| 146 |
|
| 147 |
# Step 2: Generate and execute Cypher query
|
|
|
|
| 156 |
# Step 3: Merge results
|
| 157 |
final_results = merge_results(hybrid_results, neo4j_results)
|
| 158 |
|
| 159 |
+
return final_results
|
pinecone_utilsB.py
CHANGED
|
@@ -113,44 +113,47 @@ def index_pdf_B(texts):
|
|
| 113 |
st.session_state.indexing_done = True # Marquer l'indexation comme terminée
|
| 114 |
st.success("Indexation terminée sans duplication de contenu.")
|
| 115 |
|
| 116 |
-
def hybrid_search(query):
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
|
|
|
|
|
|
| 138 |
compressed_text = match["metadata"]["compressed_text"]
|
| 139 |
sparse_values_json = match["metadata"].get("sparse_values")
|
| 140 |
|
| 141 |
-
# Désérialiser les valeurs sparse
|
| 142 |
if sparse_values_json:
|
| 143 |
sparse_values = json.loads(sparse_values_json)
|
| 144 |
|
| 145 |
relevant_docs.append(decompress_text(compressed_text))
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
-
except Exception as e:
|
| 152 |
-
st.error(f"Erreur lors de la recherche hybride : {e}")
|
| 153 |
-
return []
|
| 154 |
|
| 155 |
def compress_text(text):
|
| 156 |
"""Compresse un texte en base64."""
|
|
|
|
| 113 |
st.session_state.indexing_done = True # Marquer l'indexation comme terminée
|
| 114 |
st.success("Indexation terminée sans duplication de contenu.")
|
| 115 |
|
| 116 |
+
def hybrid_search(query, alpha, k, similarity_threshold):
|
| 117 |
+
"""Récupère les documents pertinents en combinant les résultats de Pinecone et BM25."""
|
| 118 |
+
try:
|
| 119 |
+
# Générer le vecteur dense pour la requête
|
| 120 |
+
query_vector = model.encode([query]).tolist()[0]
|
| 121 |
+
|
| 122 |
+
# Générer les valeurs sparse pour la requête
|
| 123 |
+
sparse_query = sparse_encoder.encode_queries([query])[0]
|
| 124 |
+
|
| 125 |
+
# Effectuer une recherche hybride dans l'indexB
|
| 126 |
+
results = indexB.query(
|
| 127 |
+
vector=query_vector, # Vecteur dense
|
| 128 |
+
sparse_vector=sparse_query, # Valeurs sparse
|
| 129 |
+
top_k=k, # Nombre de résultats à retourner
|
| 130 |
+
include_metadata=True, # Inclure les métadonnées
|
| 131 |
+
alpha=alpha
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# Récupérer les documents pertinents
|
| 135 |
+
relevant_docs = []
|
| 136 |
+
for match in results.get("matches", []):
|
| 137 |
+
if "metadata" in match and "compressed_text" in match["metadata"]:
|
| 138 |
+
score = match.get("score", 0) # Score de similarité
|
| 139 |
+
if score >= similarity_threshold: # Filtrer par seuil
|
| 140 |
compressed_text = match["metadata"]["compressed_text"]
|
| 141 |
sparse_values_json = match["metadata"].get("sparse_values")
|
| 142 |
|
| 143 |
+
# Désérialiser les valeurs sparse si elles existent
|
| 144 |
if sparse_values_json:
|
| 145 |
sparse_values = json.loads(sparse_values_json)
|
| 146 |
|
| 147 |
relevant_docs.append(decompress_text(compressed_text))
|
| 148 |
+
else:
|
| 149 |
+
print(f"Skipping match due to missing metadata or compressed_text: {match}")
|
| 150 |
+
|
| 151 |
+
return relevant_docs
|
| 152 |
+
|
| 153 |
+
except Exception as e:
|
| 154 |
+
st.error(f"Erreur lors de la recherche hybride : {e}")
|
| 155 |
+
return []
|
| 156 |
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
def compress_text(text):
|
| 159 |
"""Compresse un texte en base64."""
|