Upload folder using huggingface_hub
Browse files- __pycache__/mps-api.cpython-310.pyc +0 -0
- app.py +22 -4
- mps-api.py +4 -6
__pycache__/mps-api.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/mps-api.cpython-310.pyc and b/__pycache__/mps-api.cpython-310.pyc differ
|
|
|
app.py
CHANGED
|
@@ -14,11 +14,12 @@ origins = {
|
|
| 14 |
'metier.format_court2']
|
| 15 |
}
|
| 16 |
|
| 17 |
-
def retrieve(origin='Formation', query='cuisine'):
|
| 18 |
# Query API
|
| 19 |
json = dict(
|
| 20 |
query=query,
|
| 21 |
-
origins=origins[origin]
|
|
|
|
| 22 |
)
|
| 23 |
|
| 24 |
resp = requests.post(url=api_url('retrieve'), json=json)
|
|
@@ -33,11 +34,28 @@ def retrieve(origin='Formation', query='cuisine'):
|
|
| 33 |
df['origin'] = df['origin'].apply(lambda x: x.split('.')[1])
|
| 34 |
return df
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
gradio_app = gr.Interface(
|
| 37 |
fn=retrieve,
|
| 38 |
inputs=[
|
| 39 |
-
gr.Dropdown(list(origins.keys()), label="Origine", info="Choisir un type de donnée à interroger"),
|
| 40 |
-
gr.Textbox(label="Recherche", info="Votre recherche")
|
|
|
|
| 41 |
],
|
| 42 |
outputs=[
|
| 43 |
gr.DataFrame(label="Résultats", headers=["Distance", "Key", "Label", "Origin", "Document"])
|
|
|
|
| 14 |
'metier.format_court2']
|
| 15 |
}
|
| 16 |
|
| 17 |
+
def retrieve(origin='Formation', query='cuisine', n_results=20):
|
| 18 |
# Query API
|
| 19 |
json = dict(
|
| 20 |
query=query,
|
| 21 |
+
origins=origins[origin],
|
| 22 |
+
n_results=n_results
|
| 23 |
)
|
| 24 |
|
| 25 |
resp = requests.post(url=api_url('retrieve'), json=json)
|
|
|
|
| 34 |
df['origin'] = df['origin'].apply(lambda x: x.split('.')[1])
|
| 35 |
return df
|
| 36 |
|
| 37 |
+
def rank(query='cuisine', documents = []):
|
| 38 |
+
# Query API
|
| 39 |
+
json = dict(
|
| 40 |
+
query=query,
|
| 41 |
+
documents=documents
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
resp = requests.post(url=api_url('rank'), json=json)
|
| 46 |
+
data = resp.json()
|
| 47 |
+
except:
|
| 48 |
+
return []
|
| 49 |
+
|
| 50 |
+
# Format result
|
| 51 |
+
return data['ranking']
|
| 52 |
+
|
| 53 |
gradio_app = gr.Interface(
|
| 54 |
fn=retrieve,
|
| 55 |
inputs=[
|
| 56 |
+
gr.Dropdown(choices=list(origins.keys()), value=list(origins.keys())[0], label="Origine", info="Choisir un type de donnée à interroger"),
|
| 57 |
+
gr.Textbox(label="Recherche", info="Votre recherche"),
|
| 58 |
+
gr.Number(value=10, label="Nombre de résultats", info="Nombre de résultats attendus")
|
| 59 |
],
|
| 60 |
outputs=[
|
| 61 |
gr.DataFrame(label="Résultats", headers=["Distance", "Key", "Label", "Origin", "Document"])
|
mps-api.py
CHANGED
|
@@ -43,10 +43,10 @@ class VECTORDB:
|
|
| 43 |
print(f"{self.chroma_collection.count()} documents loaded.")
|
| 44 |
|
| 45 |
@method()
|
| 46 |
-
def search(self, query, origins):
|
| 47 |
results = self.chroma_collection.query(
|
| 48 |
query_texts=[query],
|
| 49 |
-
n_results=
|
| 50 |
where={"origin": {"$in": origins}},
|
| 51 |
include=['documents', 'metadatas', 'distances'])
|
| 52 |
|
|
@@ -69,10 +69,8 @@ class RANKING:
|
|
| 69 |
@method()
|
| 70 |
def rank(self, query, documents):
|
| 71 |
pairs = [[query, doc] for doc in documents]
|
| 72 |
-
print(pairs)
|
| 73 |
scores = self.cross_encoder.predict(pairs)
|
| 74 |
-
|
| 75 |
-
ranking = np.argsort(scores)[::]
|
| 76 |
return ranking
|
| 77 |
|
| 78 |
###########
|
|
@@ -85,7 +83,7 @@ def retrieve(query: Dict):
|
|
| 85 |
print(f"Retrieve query: {query}...")
|
| 86 |
|
| 87 |
# Searching documents
|
| 88 |
-
documents, metadatas, distances = VECTORDB().search.remote(query['query'], query['origins'])
|
| 89 |
|
| 90 |
return {"documents" : documents, "metadatas" : metadatas, "distances" : distances}
|
| 91 |
|
|
|
|
| 43 |
print(f"{self.chroma_collection.count()} documents loaded.")
|
| 44 |
|
| 45 |
@method()
|
| 46 |
+
def search(self, query, origins, n_results=10):
|
| 47 |
results = self.chroma_collection.query(
|
| 48 |
query_texts=[query],
|
| 49 |
+
n_results=n_results,
|
| 50 |
where={"origin": {"$in": origins}},
|
| 51 |
include=['documents', 'metadatas', 'distances'])
|
| 52 |
|
|
|
|
| 69 |
@method()
|
| 70 |
def rank(self, query, documents):
|
| 71 |
pairs = [[query, doc] for doc in documents]
|
|
|
|
| 72 |
scores = self.cross_encoder.predict(pairs)
|
| 73 |
+
ranking = np.argsort(scores)[::].tolist()
|
|
|
|
| 74 |
return ranking
|
| 75 |
|
| 76 |
###########
|
|
|
|
| 83 |
print(f"Retrieve query: {query}...")
|
| 84 |
|
| 85 |
# Searching documents
|
| 86 |
+
documents, metadatas, distances = VECTORDB().search.remote(query['query'], query['origins'], query['n_results'])
|
| 87 |
|
| 88 |
return {"documents" : documents, "metadatas" : metadatas, "distances" : distances}
|
| 89 |
|