abenkbp commited on
Commit
fab5e63
·
verified ·
1 Parent(s): 086a133

Update data/chat.py

Browse files
Files changed (1) hide show
  1. data/chat.py +17 -22
data/chat.py CHANGED
@@ -1,17 +1,26 @@
 
1
  import spaces
2
  import os
3
  import json
4
- import argparse
5
  from huggingface_hub import InferenceClient, login
6
 
 
 
 
 
7
  api_key = os.getenv("UCODE_SECRET")
8
  login(api_key)
9
-
10
- # Initialize the InferenceClient with the specified model
11
  client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
12
 
 
13
  @spaces.GPU()
14
- def chat_completion(user_input, max_tokens, temperature, top_p):
 
 
 
 
 
 
15
  try:
16
  response = ""
17
  for message in client.chat_completion(
@@ -24,26 +33,12 @@ def chat_completion(user_input, max_tokens, temperature, top_p):
24
  token = message.choices[0].delta.get("content", "")
25
  response += token
26
 
27
- return json.dumps({"status": "success", "output": response})
28
  except Exception as e:
29
- return json.dumps({"status": "error", "message": str(e)})
30
 
31
  def main():
32
- parser = argparse.ArgumentParser(description="Chat completion with Meta-Llama-3-70B-Instruct")
33
- parser.add_argument("user_input", type=str, help="The input text for the chat model")
34
- parser.add_argument("--max_tokens", type=int, default=50, help="Maximum number of tokens in the response")
35
- parser.add_argument("--temperature", type=float, default=0.7, help="Sampling temperature")
36
- parser.add_argument("--top_p", type=float, default=0.9, help="Top-p sampling value")
37
-
38
- args = parser.parse_args()
39
-
40
- result = chat_completion(
41
- user_input=args.user_input,
42
- max_tokens=args.max_tokens,
43
- temperature=args.temperature,
44
- top_p=args.top_p
45
- )
46
- print(result)
47
 
48
  if __name__ == "__main__":
49
- main()
 
1
+ from flask import Flask, request, jsonify
2
  import spaces
3
  import os
4
  import json
 
5
  from huggingface_hub import InferenceClient, login
6
 
7
+ # Initialize Flask app
8
+ app = Flask(__name__)
9
+
10
+ # Load the API key and initialize the InferenceClient
11
  api_key = os.getenv("UCODE_SECRET")
12
  login(api_key)
 
 
13
  client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
14
 
15
+ @app.route('/chat', methods=['POST'])
16
  @spaces.GPU()
17
+ def chat_completion():
18
+ data = request.json
19
+ user_input = data.get('user_input', '')
20
+ max_tokens = data.get('max_tokens', 512)
21
+ temperature = data.get('temperature', 0.7)
22
+ top_p = data.get('top_p', 0.95)
23
+
24
  try:
25
  response = ""
26
  for message in client.chat_completion(
 
33
  token = message.choices[0].delta.get("content", "")
34
  response += token
35
 
36
+ return jsonify({"status": "success", "output": response})
37
  except Exception as e:
38
+ return jsonify({"status": "error", "message": str(e)})
39
 
40
  def main():
41
+ app.run(host='localhost', port=3000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  if __name__ == "__main__":
44
+ main()