Spaces:

UCODE
/

agent

Sleeping

App Files Files Community

abenkbp commited on Jul 27, 2024

Commit

c76e3b4

verified ·

1 Parent(s): fb58ac0

Create chat.py

Browse files

Files changed (1) hide show

data/chat.py +49 -0

data/chat.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import spaces
+import os
+import json
+import argparse
+from huggingface_hub import InferenceClient, login
+api_key = os.getenv("UCODE_SECRET")
+login(api_key)
+# Initialize the InferenceClient with the specified model
+client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
+@spaces.GPU()
+def chat_completion(user_input, max_tokens, temperature, top_p):
+    try:
+        response = ""
+        for message in client.chat_completion(
+            user_input,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+        ):
+            token = message.choices[0].delta.get("content", "")
+            response += token
+        return json.dumps({"status": "success", "output": response})
+    except Exception as e:
+        return json.dumps({"status": "error", "message": str(e)})
+def main():
+    parser = argparse.ArgumentParser(description="Chat completion with Meta-Llama-3-70B-Instruct")
+    parser.add_argument("user_input", type=str, help="The input text for the chat model")
+    parser.add_argument("--max_tokens", type=int, default=50, help="Maximum number of tokens in the response")
+    parser.add_argument("--temperature", type=float, default=0.7, help="Sampling temperature")
+    parser.add_argument("--top_p", type=float, default=0.9, help="Top-p sampling value")
+    args = parser.parse_args()
+    result = chat_completion(
+        user_input=args.user_input,
+        max_tokens=args.max_tokens,
+        temperature=args.temperature,
+        top_p=args.top_p
+    )
+    print(result)
+if __name__ == "__main__":
+    main()