Spaces:

forterro
/

tariff_codes

Sleeping

App Files Files Community

dxnxk commited on May 15, 2025

Commit

bc1eeb8

1 Parent(s): 29ebbed

WIP

Browse files

Files changed (1) hide show

app.py +48 -27

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import pandas as pd
 import numpy as np
-import gradio as gr
 import faiss
-import sys
 from sentence_transformers import SentenceTransformer
 from huggingface_hub import InferenceClient
@@ -12,40 +13,60 @@ df.columns = df.columns.str.strip()
 descriptions = df["brief_description"].astype(str).tolist()
 codes = df["hts8"].astype(str).tolist()
-# --- Create embeddings ---
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
-embeddings = embedding_model.encode(descriptions, convert_to_numpy=True)
-# --- FAISS index (cosine similarity) ---
-dim = embeddings.shape[1]
-faiss.normalize_L2(embeddings)
-index = faiss.IndexFlatIP(dim)
-index.add(embeddings)
-# --- Inference API ---
-client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
-# --- RAG pipeline ---
-def generate_answer(user_query):
-    sys.stderr.write("=== generate_answer called ===\n")
-    sys.stderr.flush()
-    query_embedding = embedding_model.encode([user_query], convert_to_numpy=True)
     faiss.normalize_L2(query_embedding)
     _, indices = index.search(query_embedding, k=5)
     context = "\n".join([f"{codes[i]}: {descriptions[i]}" for i in indices[0]])
-    prompt = f"""Here are some tariff code descriptions:\n{context}\n\nQuestion: {user_query}\nAnswer:"""
-    sys.stderr.write(f"Prompt sent to model:\n{prompt}\n")
-    sys.stderr.flush()
-    response = client.text_generation(
-        prompt,
-        max_new_tokens=200,
         temperature=0.7,
-        stop_sequences=["\n\n"]
-    )
-    return response.strip()
-gr.Interface(fn=generate_answer, inputs="text", outputs="text").launch()

+import os
+import sys
 import pandas as pd
 import numpy as np
 import faiss
+import gradio as gr
 from sentence_transformers import SentenceTransformer
 from huggingface_hub import InferenceClient
 descriptions = df["brief_description"].astype(str).tolist()
 codes = df["hts8"].astype(str).tolist()
+# --- Embedding model ---
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+# --- Load or compute embeddings + FAISS index ---
+if os.path.exists("embeddings.npy") and os.path.exists("faiss.index"):
+    embeddings = np.load("embeddings.npy")
+    index = faiss.read_index("faiss.index")
+else:
+    embeddings = embedding_model.encode(descriptions, convert_to_numpy=True)
+    faiss.normalize_L2(embeddings)
+    index = faiss.IndexFlatIP(embeddings.shape[1])
+    index.add(embeddings)
+    np.save("embeddings.npy", embeddings)
+    faiss.write_index(index, "faiss.index")
+# --- Inference API client ---
+client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+def respond(message, history: list[dict]):
+    # 1. encode query and retrieve context
+    query_embedding = embedding_model.encode([message], convert_to_numpy=True)
     faiss.normalize_L2(query_embedding)
     _, indices = index.search(query_embedding, k=5)
     context = "\n".join([f"{codes[i]}: {descriptions[i]}" for i in indices[0]])
+    # 2. prepare system prompt with role + retrieved context
+    system_prompt = f"""You are an expert assistant specialized in tariff classification.
+Your job is to help users find the most appropriate tariff codes based on their description.
+Use only the provided context below to answer.
+Context:
+{context}
+"""
+    # 3. insert system message at the beginning
+    messages = [{"role": "system", "content": system_prompt}]
+    messages += history + [{"role": "user", "content": message}]
+    response = {"role": "assistant", "content": ""}
+    for message in client.chat_completion(
+        messages,
+        max_tokens=512,
+        stream=True,
         temperature=0.7,
+        top_p=0.95,
+    ):
+        token = message.choices[0].delta.content
+        response["content"] += token
+        yield response
+demo = gr.ChatInterface(respond, type="messages")
+if __name__ == "__main__":
+    demo.launch()