from openai import OpenAI
import gradio as gr
import requests
# api_key = "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"
# client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")

# def predict(message, history):
    # history_openai_format = []
    # #history_openai_format.append({"role": "system", "content": "Answer the questions in japanese only"})
    
    # #for human, assistant in history:
    # #    history_openai_format.append({"role": "user", "content": human })
    # #    history_openai_format.append({"role": "assistant", "content":assistant})
    # history_openai_format.append({"role": "user", "content": message})
  
    # response = client.chat.completions.create(model='sonar-small-chat',
    # messages= history_openai_format,
    # temperature=1.0,
    # stream=True)

    # partial_message = ""
    # for chunk in response:
        # if chunk.choices[0].delta.content is not None:
            # partial_message = partial_message + chunk.choices[0].delta.content
            # yield partial_message

# gr.ChatInterface(predict).launch()


import mlflow.deployments

def predict(message, history):

    databricks_token="dapi8239fa5017885e5642900c4ad4f0f4e3"
    endpoint = "https://dbc-6a78cc1b-db54.cloud.databricks.com/serving-endpoints/dbdemos_endpoint_advanced_may_rag_chatbot/invocations"
    headers = {'Authorization': f'Bearer {databricks_token}'}
    if message != None:
        data_json = {'inputs': [{'messages': [{'content': message, 'role':'user'}]}]}
        
    response = requests.request(method='POST', headers=headers, url=endpoint, json=data_json)
    if response.status_code != 200:
        raise Exception(f'Request failed with status {response.status_code}, {response.text}')
    print(response.json())
    yield response.json().predictions[0].result
    #.response.text.predictions[0].result
    
    
gr.ChatInterface(predict).launch()