Spaces:

large-traversaal
/

Alif-1.0-8B-Instruct

Sleeping

App Files Files Community

alishafique commited on Jul 5

Commit

a28de04

verified ·

1 Parent(s): 8f689f0

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -19

app.py CHANGED Viewed

@@ -37,29 +37,63 @@ llama = Llama(
 #             text += content
 #             yield text
-def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
-    """Generates a streaming response from the Llama model."""
-    messages = [
-        {"role": "system", "content": "You are an Urdu Chatbot. Write an appropriate response for the given instruction."},
-    ]
-    # Add history and the current message
-    for user, bot in history:
-        messages.append({"role": "user", "content": user})
-        messages.append({"role": "assistant", "content": bot})
-    messages.append({"role": "user", "content": message})
-    response = llama.create_chat_completion(
-        messages=messages,
-        stream=True,
-    )
-    partial_message = ""
-    for part in response:
-        content = part["choices"][0]["delta"].get("content", "")
-        partial_message += content
-        yield partial_message
 # JavaScript function for `on_load`

 #             text += content
 #             yield text
+# def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
+#     """Generates a streaming response from the Llama model."""
+#     messages = [
+#         {"role": "system", "content": "You are an Urdu Chatbot. Write an appropriate response for the given instruction."},
+#     ]
+#     # Add history and the current message
+#     for user, bot in history:
+#         messages.append({"role": "user", "content": user})
+#         messages.append({"role": "assistant", "content": bot})
+#     messages.append({"role": "user", "content": message})
+#     response = llama.create_chat_completion(
+#         messages=messages,
+#         stream=True,
+#     )
+#     partial_message = ""
+#     for part in response:
+#         content = part["choices"][0]["delta"].get("content", "")
+#         partial_message += content
+#         yield partial_message
+def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
+    """Generates a streaming response from the Llama model using Alpaca chat template."""
+    # Start with system prompt
+    chat_prompt = system_prompt or "You are an Urdu Chatbot. Write an appropriate response for the given instruction."
+    chat_prompt += "\n"
+    # Add history to the prompt
+    for user, bot in history:
+        chat_prompt += f"\n### Instruction:\n{user}\n\n### Response:\n{bot}\n"
+    # Add current message
+    chat_prompt += f"\n### Instruction:\n{message}\n\n### Response:\n"
+    response = llama(
+        chat_prompt,
+        temperature=temperature,
+        max_tokens=max_new_tokens,
+        top_k=top_k,
+        repeat_penalty=repetition_penalty,
+        top_p=top_p,
+        stop=["###", "### Instruction:", "\n### Instruction:", "Q:"],
+        echo=False,
+        stream=True
+    )
+    text = ""
+    for chunk in response:
+        content = chunk["choices"][0]["text"]
+        if content:
+            text += content
+            yield text
 # JavaScript function for `on_load`