Spaces:
Sleeping
Sleeping
Commit ·
45bb735
1
Parent(s): 7ccaecf
removing cohere
Browse files- app.py +10 -26
- requirements.txt +0 -1
app.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from huggingface_hub import InferenceClient
|
| 3 |
import os
|
| 4 |
-
import cohere
|
| 5 |
import faiss
|
| 6 |
from transformers import pipeline
|
| 7 |
from sentence_transformers import SentenceTransformer
|
|
@@ -23,10 +22,8 @@ index = faiss.IndexFlatL2(document_embeddings_np.shape[1])
|
|
| 23 |
index.add(document_embeddings_np)
|
| 24 |
|
| 25 |
|
| 26 |
-
client = InferenceClient("meta-llama/Llama-3.2-
|
| 27 |
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
| 28 |
-
client_cohere = cohere.Client(COHERE_API_KEY)
|
| 29 |
-
COHERE_MODEL = "command-r-plus"
|
| 30 |
|
| 31 |
def respond(
|
| 32 |
message,
|
|
@@ -35,7 +32,6 @@ def respond(
|
|
| 35 |
max_tokens,
|
| 36 |
temperature,
|
| 37 |
top_p,
|
| 38 |
-
use_cohere_api,
|
| 39 |
):
|
| 40 |
|
| 41 |
query_embedding = embedding_model.encode([message], convert_to_tensor=True)
|
|
@@ -56,27 +52,16 @@ def respond(
|
|
| 56 |
|
| 57 |
response = ""
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
response
|
| 68 |
yield response
|
| 69 |
-
else:
|
| 70 |
-
for message in client.chat_completion(
|
| 71 |
-
messages,
|
| 72 |
-
max_tokens=max_tokens,
|
| 73 |
-
stream=True,
|
| 74 |
-
temperature=temperature,
|
| 75 |
-
top_p=top_p,
|
| 76 |
-
):
|
| 77 |
-
token = message.choices[0].delta.content
|
| 78 |
-
response += token
|
| 79 |
-
yield response
|
| 80 |
|
| 81 |
demo = gr.ChatInterface(
|
| 82 |
respond,
|
|
@@ -91,7 +76,6 @@ demo = gr.ChatInterface(
|
|
| 91 |
step=0.05,
|
| 92 |
label="Top-p (nucleus sampling)",
|
| 93 |
),
|
| 94 |
-
gr.Checkbox(label="Use Cohere API."),
|
| 95 |
],
|
| 96 |
)
|
| 97 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from huggingface_hub import InferenceClient
|
| 3 |
import os
|
|
|
|
| 4 |
import faiss
|
| 5 |
from transformers import pipeline
|
| 6 |
from sentence_transformers import SentenceTransformer
|
|
|
|
| 22 |
index.add(document_embeddings_np)
|
| 23 |
|
| 24 |
|
| 25 |
+
client = InferenceClient("meta-llama/Llama-3.2-B-Instruct")
|
| 26 |
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
|
|
|
|
|
|
| 27 |
|
| 28 |
def respond(
|
| 29 |
message,
|
|
|
|
| 32 |
max_tokens,
|
| 33 |
temperature,
|
| 34 |
top_p,
|
|
|
|
| 35 |
):
|
| 36 |
|
| 37 |
query_embedding = embedding_model.encode([message], convert_to_tensor=True)
|
|
|
|
| 52 |
|
| 53 |
response = ""
|
| 54 |
|
| 55 |
+
for message in client.chat_completion(
|
| 56 |
+
messages,
|
| 57 |
+
max_tokens=max_tokens,
|
| 58 |
+
stream=True,
|
| 59 |
+
temperature=temperature,
|
| 60 |
+
top_p=top_p,
|
| 61 |
+
):
|
| 62 |
+
token = message.choices[0].delta.content
|
| 63 |
+
response += token
|
| 64 |
yield response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
demo = gr.ChatInterface(
|
| 67 |
respond,
|
|
|
|
| 76 |
step=0.05,
|
| 77 |
label="Top-p (nucleus sampling)",
|
| 78 |
),
|
|
|
|
| 79 |
],
|
| 80 |
)
|
| 81 |
|
requirements.txt
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
huggingface_hub==0.25.2
|
| 2 |
-
cohere
|
| 3 |
faiss
|
| 4 |
sentence_transformers
|
|
|
|
| 1 |
huggingface_hub==0.25.2
|
|
|
|
| 2 |
faiss
|
| 3 |
sentence_transformers
|