abenkbp commited on
Commit
c76e3b4
·
verified ·
1 Parent(s): fb58ac0

Create chat.py

Browse files
Files changed (1) hide show
  1. data/chat.py +49 -0
data/chat.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import os
3
+ import json
4
+ import argparse
5
+ from huggingface_hub import InferenceClient, login
6
+
7
+ api_key = os.getenv("UCODE_SECRET")
8
+ login(api_key)
9
+
10
+ # Initialize the InferenceClient with the specified model
11
+ client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
12
+
13
+ @spaces.GPU()
14
+ def chat_completion(user_input, max_tokens, temperature, top_p):
15
+ try:
16
+ response = ""
17
+ for message in client.chat_completion(
18
+ user_input,
19
+ max_tokens=max_tokens,
20
+ stream=True,
21
+ temperature=temperature,
22
+ top_p=top_p,
23
+ ):
24
+ token = message.choices[0].delta.get("content", "")
25
+ response += token
26
+
27
+ return json.dumps({"status": "success", "output": response})
28
+ except Exception as e:
29
+ return json.dumps({"status": "error", "message": str(e)})
30
+
31
+ def main():
32
+ parser = argparse.ArgumentParser(description="Chat completion with Meta-Llama-3-70B-Instruct")
33
+ parser.add_argument("user_input", type=str, help="The input text for the chat model")
34
+ parser.add_argument("--max_tokens", type=int, default=50, help="Maximum number of tokens in the response")
35
+ parser.add_argument("--temperature", type=float, default=0.7, help="Sampling temperature")
36
+ parser.add_argument("--top_p", type=float, default=0.9, help="Top-p sampling value")
37
+
38
+ args = parser.parse_args()
39
+
40
+ result = chat_completion(
41
+ user_input=args.user_input,
42
+ max_tokens=args.max_tokens,
43
+ temperature=args.temperature,
44
+ top_p=args.top_p
45
+ )
46
+ print(result)
47
+
48
+ if __name__ == "__main__":
49
+ main()