from openai import OpenAI import gradio as gr import requests # api_key = "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" # client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai") # def predict(message, history): # history_openai_format = [] # #history_openai_format.append({"role": "system", "content": "Answer the questions in japanese only"}) # #for human, assistant in history: # # history_openai_format.append({"role": "user", "content": human }) # # history_openai_format.append({"role": "assistant", "content":assistant}) # history_openai_format.append({"role": "user", "content": message}) # response = client.chat.completions.create(model='sonar-small-chat', # messages= history_openai_format, # temperature=1.0, # stream=True) # partial_message = "" # for chunk in response: # if chunk.choices[0].delta.content is not None: # partial_message = partial_message + chunk.choices[0].delta.content # yield partial_message # gr.ChatInterface(predict).launch() import mlflow.deployments def predict(message, history): databricks_token="dapi8239fa5017885e5642900c4ad4f0f4e3" endpoint = "https://dbc-6a78cc1b-db54.cloud.databricks.com/serving-endpoints/dbdemos_endpoint_advanced_may_rag_chatbot/invocations" headers = {'Authorization': f'Bearer {databricks_token}'} if message != None: data_json = {'inputs': [{'messages': [{'content': message, 'role':'user'}]}]} response = requests.request(method='POST', headers=headers, url=endpoint, json=data_json) if response.status_code != 200: raise Exception(f'Request failed with status {response.status_code}, {response.text}') print(response.json()) yield response.json().predictions[0].result #.response.text.predictions[0].result gr.ChatInterface(predict).launch()