Spaces:

UCODE
/

agent

Sleeping

App Files Files Community

abenkbp commited on Jul 27, 2024

Commit

fab5e63

verified ·

1 Parent(s): 086a133

Update data/chat.py

Browse files

Files changed (1) hide show

data/chat.py +17 -22

data/chat.py CHANGED Viewed

@@ -1,17 +1,26 @@
 import spaces
 import os
 import json
-import argparse
 from huggingface_hub import InferenceClient, login
 api_key = os.getenv("UCODE_SECRET")
 login(api_key)
-# Initialize the InferenceClient with the specified model
 client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
 @spaces.GPU()
-def chat_completion(user_input, max_tokens, temperature, top_p):
     try:
         response = ""
         for message in client.chat_completion(
@@ -24,26 +33,12 @@ def chat_completion(user_input, max_tokens, temperature, top_p):
             token = message.choices[0].delta.get("content", "")
             response += token
-        return json.dumps({"status": "success", "output": response})
     except Exception as e:
-        return json.dumps({"status": "error", "message": str(e)})
 def main():
-    parser = argparse.ArgumentParser(description="Chat completion with Meta-Llama-3-70B-Instruct")
-    parser.add_argument("user_input", type=str, help="The input text for the chat model")
-    parser.add_argument("--max_tokens", type=int, default=50, help="Maximum number of tokens in the response")
-    parser.add_argument("--temperature", type=float, default=0.7, help="Sampling temperature")
-    parser.add_argument("--top_p", type=float, default=0.9, help="Top-p sampling value")
-    args = parser.parse_args()
-    result = chat_completion(
-        user_input=args.user_input,
-        max_tokens=args.max_tokens,
-        temperature=args.temperature,
-        top_p=args.top_p
-    )
-    print(result)
 if __name__ == "__main__":
-    main()

+from flask import Flask, request, jsonify
 import spaces
 import os
 import json
 from huggingface_hub import InferenceClient, login
+# Initialize Flask app
+app = Flask(__name__)
+# Load the API key and initialize the InferenceClient
 api_key = os.getenv("UCODE_SECRET")
 login(api_key)
 client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
+@app.route('/chat', methods=['POST'])
 @spaces.GPU()
+def chat_completion():
+    data = request.json
+    user_input = data.get('user_input', '')
+    max_tokens = data.get('max_tokens', 512)
+    temperature = data.get('temperature', 0.7)
+    top_p = data.get('top_p', 0.95)
     try:
         response = ""
         for message in client.chat_completion(
             token = message.choices[0].delta.get("content", "")
             response += token
+        return jsonify({"status": "success", "output": response})
     except Exception as e:
+        return jsonify({"status": "error", "message": str(e)})
 def main():
+    app.run(host='localhost', port=3000)
 if __name__ == "__main__":
+    main()