File size: 1,872 Bytes
2812f81
3a05cea
bda9123
37b33c6
 
3a05cea
37b33c6
 
 
dfa418b
37b33c6
 
 
 
2812f81
37b33c6
 
 
 
5e8df2b
37b33c6
 
 
 
 
5e8df2b
37b33c6
5e8df2b
32eda81
3d20892
6203dbe
3d20892
6203dbe
3d20892
 
 
 
58ac128
fc67f99
 
3d20892
 
f6762ce
544cfcd
3d20892
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from openai import OpenAI
import gradio as gr
import requests
# api_key = "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"
# client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")

# def predict(message, history):
    # history_openai_format = []
    # #history_openai_format.append({"role": "system", "content": "Answer the questions in japanese only"})
    
    # #for human, assistant in history:
    # #    history_openai_format.append({"role": "user", "content": human })
    # #    history_openai_format.append({"role": "assistant", "content":assistant})
    # history_openai_format.append({"role": "user", "content": message})
  
    # response = client.chat.completions.create(model='sonar-small-chat',
    # messages= history_openai_format,
    # temperature=1.0,
    # stream=True)

    # partial_message = ""
    # for chunk in response:
        # if chunk.choices[0].delta.content is not None:
            # partial_message = partial_message + chunk.choices[0].delta.content
            # yield partial_message

# gr.ChatInterface(predict).launch()


import mlflow.deployments

def predict(message, history):

    databricks_token="dapi8239fa5017885e5642900c4ad4f0f4e3"
    endpoint = "https://dbc-6a78cc1b-db54.cloud.databricks.com/serving-endpoints/dbdemos_endpoint_advanced_may_rag_chatbot/invocations"
    headers = {'Authorization': f'Bearer {databricks_token}'}
    if message != None:
        data_json = {'inputs': [{'messages': [{'content': message, 'role':'user'}]}]}
        
    response = requests.request(method='POST', headers=headers, url=endpoint, json=data_json)
    if response.status_code != 200:
        raise Exception(f'Request failed with status {response.status_code}, {response.text}')
    print(response.json())
    yield response.json()['predictions'][0]['result']
    
    
gr.ChatInterface(predict).launch()