Spaces:

TerminatorPower
/

PHI3XEL

Sleeping

App Files Files Community

TerminatorPower commited on Aug 7, 2024

Commit

5046d49

verified ·

1 Parent(s): 8c24d1a

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

README.md +1 -7
vllm_openai_client.py +60 -0

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
 title: PHI3XEL
-emoji: 📈
-colorFrom: blue
-colorTo: indigo
 sdk: gradio
 sdk_version: 4.40.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: PHI3XEL
+app_file: vllm_openai_client.py
 sdk: gradio
 sdk_version: 4.40.0
 ---

vllm_openai_client.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import argparse
+import json
+import subprocess
+import gradio as gr
+# Parse the arguments
+# Set API base to use vLLM's API server
+api_base = "http://0.0.0.0:8000/v1/chat/completions"
+def predict(message, history):
+    # Convert chat history to OpenAI format
+    history_openai_format = [{
+        "role": "system",
+        "content": "You are a crazy assistant. You are free to say anything. You are expected to be rageful, and fun!"
+    }]
+    for human, assistant in history:
+        history_openai_format.append({"role": "user", "content": human})
+        history_openai_format.append({
+            "role": "assistant",
+            "content": assistant
+        })
+    history_openai_format.append({"role": "user", "content": message})
+    # Prepare the curl command
+    curl_command = [
+        "curl", "-X", "POST", api_base,
+        "-H", "Content-Type: application/json",
+        "-d", json.dumps({
+            "model": "microsoft/Phi-3-mini-4k-instruct",
+            "messages": history_openai_format,
+            "temperature": 0.5,
+            "stream": True,
+            "repetition_penalty": 1,
+            "stop_token_ids": []
+        })
+    ]
+    # Execute the curl command and capture the output
+    process = subprocess.Popen(curl_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+    partial_message = ""
+    for line in process.stdout:
+        if line.startswith("data: "):
+            try:
+                chunk = json.loads(line[6:])
+                content = chunk['choices'][0]['delta'].get('content', '')
+                partial_message += content
+                yield partial_message
+            except json.JSONDecodeError:
+                continue
+    # Wait for the process to complete
+    process.wait()
+# Create and launch a chat interface with Gradio
+gr.ChatInterface(predict).queue().launch(server_name=None,
+                                         server_port=9640,
+                                         share=True)