Spaces:
Sleeping
Sleeping
Commit ·
5a3850e
1
Parent(s): c88d20c
Use Mistral embeddings to match original index
Browse files- app.py +23 -25
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
|
|
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
from langchain_core.embeddings import Embeddings
|
| 5 |
from langchain_pinecone import PineconeVectorStore
|
|
@@ -13,36 +14,33 @@ from pinecone import Pinecone
|
|
| 13 |
DEFAULT_MODEL = "Qwen/Qwen2.5-72B-Instruct"
|
| 14 |
|
| 15 |
|
| 16 |
-
class
|
| 17 |
-
"""
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
self.
|
| 22 |
-
self.
|
| 23 |
|
| 24 |
-
def
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
| 35 |
|
| 36 |
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
| 37 |
-
|
| 38 |
-
for text in texts:
|
| 39 |
-
result = self.client.feature_extraction(text, model=self.model_name)
|
| 40 |
-
embeddings.append(self._to_float_list(result))
|
| 41 |
-
return embeddings
|
| 42 |
|
| 43 |
def embed_query(self, text: str) -> list[float]:
|
| 44 |
-
|
| 45 |
-
return self._to_float_list(result)
|
| 46 |
|
| 47 |
|
| 48 |
class ResearchParrot:
|
|
@@ -54,7 +52,7 @@ class ResearchParrot:
|
|
| 54 |
|
| 55 |
def embeddings(self):
|
| 56 |
if self._embeddings is None:
|
| 57 |
-
self._embeddings =
|
| 58 |
return self._embeddings
|
| 59 |
|
| 60 |
def vectorstore(self):
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
+
import requests
|
| 4 |
from huggingface_hub import InferenceClient
|
| 5 |
from langchain_core.embeddings import Embeddings
|
| 6 |
from langchain_pinecone import PineconeVectorStore
|
|
|
|
| 14 |
DEFAULT_MODEL = "Qwen/Qwen2.5-72B-Instruct"
|
| 15 |
|
| 16 |
|
| 17 |
+
class MistralEmbeddings(Embeddings):
|
| 18 |
+
"""Mistral embeddings to match the original index"""
|
| 19 |
|
| 20 |
+
def __init__(self):
|
| 21 |
+
self.api_key = os.getenv("MISTRAL_API_KEY")
|
| 22 |
+
self.model = "mistral-embed"
|
| 23 |
+
self.url = "https://api.mistral.ai/v1/embeddings"
|
| 24 |
|
| 25 |
+
def _get_embeddings(self, texts: list[str]) -> list[list[float]]:
|
| 26 |
+
headers = {
|
| 27 |
+
"Authorization": f"Bearer {self.api_key}",
|
| 28 |
+
"Content-Type": "application/json"
|
| 29 |
+
}
|
| 30 |
+
data = {
|
| 31 |
+
"model": self.model,
|
| 32 |
+
"input": texts
|
| 33 |
+
}
|
| 34 |
+
response = requests.post(self.url, headers=headers, json=data)
|
| 35 |
+
response.raise_for_status()
|
| 36 |
+
result = response.json()
|
| 37 |
+
return [item["embedding"] for item in result["data"]]
|
| 38 |
|
| 39 |
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
| 40 |
+
return self._get_embeddings(texts)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
def embed_query(self, text: str) -> list[float]:
|
| 43 |
+
return self._get_embeddings([text])[0]
|
|
|
|
| 44 |
|
| 45 |
|
| 46 |
class ResearchParrot:
|
|
|
|
| 52 |
|
| 53 |
def embeddings(self):
|
| 54 |
if self._embeddings is None:
|
| 55 |
+
self._embeddings = MistralEmbeddings()
|
| 56 |
return self._embeddings
|
| 57 |
|
| 58 |
def vectorstore(self):
|
requirements.txt
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
gradio>=4.0.0
|
| 2 |
huggingface_hub>=0.20.0
|
| 3 |
langchain>=0.1.0
|
| 4 |
-
langchain-
|
| 5 |
langchain-pinecone>=0.1.0
|
| 6 |
pinecone-client>=3.0.0
|
|
|
|
|
|
| 1 |
gradio>=4.0.0
|
| 2 |
huggingface_hub>=0.20.0
|
| 3 |
langchain>=0.1.0
|
| 4 |
+
langchain-core>=0.1.0
|
| 5 |
langchain-pinecone>=0.1.0
|
| 6 |
pinecone-client>=3.0.0
|
| 7 |
+
requests>=2.28.0
|