Spaces:
Sleeping
Sleeping
File size: 1,477 Bytes
f7b069f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | import re
from sklearn.metrics.pairwise import cosine_similarity
from core.config import collection, client_embedding, embedding_model
def get_query_embedding(query: str):
"""Retourne l'embedding de la requête"""
response = client_embedding.embeddings.create(input=query, model=embedding_model)
return response.data[0].embedding
def extract_article_number(query: str):
"""Extrait le numéro d'article explicitement mentionné"""
match = re.search(r'article\s*(\w+)', query, re.IGNORECASE)
if match:
word = match.group(1).lower()
if word == "premier":
return "Article premier"
elif word.isdigit():
return f"Article {word}"
return None
def find_relevant_articles(query: str, threshold: float = 0.8, max_articles: int = 10):
"""Trouve les articles les plus similaires à la requête"""
article_num = extract_article_number(query)
if article_num:
doc = collection.find_one({"article_num": article_num})
if doc:
return [(doc, 1.0)]
query_vector = get_query_embedding(query)
similarities = []
for doc in collection.find():
article_vector = doc.get("embedding2")
if article_vector:
sim = cosine_similarity([query_vector], [article_vector])[0][0]
if sim >= threshold:
similarities.append((doc, sim))
similarities.sort(key=lambda x: x[1], reverse=True)
return similarities[:max_articles]
|