Spaces:

forterro
/

tariff_codes

Sleeping

dxnxk commited on May 15, 2025

Commit

b46b89c

1 Parent(s): 76b40e0

fix HF Gradio error

Files changed (1) hide show

app.py CHANGED Viewed

@@ -31,15 +31,13 @@ else:
 # --- Inference API client ---
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=os.getenv("HF_TOKEN"))
-def respond(message, history: list[dict]):
-    # 1. encode query and retrieve context
     query_embedding = embedding_model.encode([message], convert_to_numpy=True)
     faiss.normalize_L2(query_embedding)
     _, indices = index.search(query_embedding, k=5)
     context = "\n".join([f"{codes[i]}: {descriptions[i]}" for i in indices[0]])
-    # 2. prepare system prompt with role + retrieved context
     system_prompt = f"""You are an expert assistant specialized in tariff classification.
 Your job is to help users find the most appropriate tariff codes based on their description.
 Use only the provided context below to answer.
@@ -48,23 +46,22 @@ Context:
 {context}
 """
-    # 3. insert system message at the beginning
     messages = [{"role": "system", "content": system_prompt}]
     messages += history + [{"role": "user", "content": message}]
-    response = {"role": "assistant", "content": ""}
-    for message in client.chat_completion(
         messages,
         max_tokens=512,
         stream=True,
         temperature=0.7,
         top_p=0.95,
     ):
-        token = message.choices[0].delta.content
-        response["content"] += token
-        formatted = response["content"].replace("\n", "\n\n")
-        yield {"role": "assistant", "content": formatted}
 demo = gr.ChatInterface(
     respond, type="messages",

 # --- Inference API client ---
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=os.getenv("HF_TOKEN"))
+def respond(message, history):
     query_embedding = embedding_model.encode([message], convert_to_numpy=True)
     faiss.normalize_L2(query_embedding)
     _, indices = index.search(query_embedding, k=5)
     context = "\n".join([f"{codes[i]}: {descriptions[i]}" for i in indices[0]])
     system_prompt = f"""You are an expert assistant specialized in tariff classification.
 Your job is to help users find the most appropriate tariff codes based on their description.
 Use only the provided context below to answer.
 {context}
 """
     messages = [{"role": "system", "content": system_prompt}]
     messages += history + [{"role": "user", "content": message}]
+    full_response = ""
+    for chunk in client.chat_completion(
         messages,
         max_tokens=512,
         stream=True,
         temperature=0.7,
         top_p=0.95,
     ):
+        token = chunk.choices[0].delta.content
+        if token:
+            full_response += token
+            yield full_response.replace("\n", "\n\n")
 demo = gr.ChatInterface(
     respond, type="messages",