Spaces:

080-ai
/

cutlass_v1

Runtime error

ambrosfitz commited on May 2, 2024

Commit

aad58a2

verified ·

1 Parent(s): 6964303

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import os
+from openai import OpenAI
+import time
+# Initialize the OpenAI Client
+client = OpenAI(
+    api_key=os.environ.get("RUNPOD_API_KEY"),
+    base_url="https://api.runpod.ai/v2/vllm-k0g4c60zor9xuu/openai/v1",
+)
+def runpod_chat(question, history):
+    if not history:
+        history = []
+    history.append({"role": "user", "content": question})
+    response_stream = client.chat.completions.create(
+        model="ambrosfitz/llama-3-history",
+        messages=history,
+        temperature=0,
+        max_tokens=1028,
+        stream=True,
+    )
+    # Stream the response and add to history
+    responses = []
+    for message in response_stream:
+        response = message.choices[0].delta.content
+        responses.append(response)
+        history.append({"role": "assistant", "content": response})
+        time.sleep(0.3)  # Simulate typing delay
+        yield "RunPod: " + response
+# Set up the Gradio interface
+iface = gr.Interface(
+    fn=runpod_chat,
+    inputs=[
+        gr.inputs.Textbox(label="Enter your question:"),
+        gr.inputs.State(label="History")
+    ],
+    outputs="chat",
+    title="RunPod Chat",
+    description="This app interfaces with RunPod's API to provide responses to your queries."
+)
+iface.launch()