import flask
from flask import Flask, request, jsonify
from langchain import PromptTemplate, LLMChain
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain import HuggingFaceHub
import os
from dotenv import load_dotenv
load_dotenv()
from huggingface_hub import InferenceClient
from langchain import HuggingFaceHub
import requests
import uuid
import sys

hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
starchat_repo_id = os.environ.get('starchat_repo_id')
llama2_repo_id=os.environ.get('llama2_repo_id')
#port = os.getenv('port')

llm = HuggingFaceHub(repo_id=llama2_repo_id,  #for Llama2
                     #repo_id=starchat_repo_id,     #for StarChat                  
                     huggingfacehub_api_token=hf_token,
                     model_kwargs={#"min_length":512,  #for StarChat
                                   "min_length":1024,  #for Llama2
                                   #"max_new_tokens":3072, "do_sample":True,  #for StarChat
                                   "max_new_tokens":5632, "do_sample":True,  #for Llama2                                 
                                   "temperature":0.1,
                                   "top_k":50,
                                   "top_p":0.95, "eos_token_id":49155}) 

prompt_template = """
<<SYS>>You are a helpful, respectful and honest assistant. If you don't know the answer to a question, please don't share false information.In each conversation, question is placed after [INST] while your answer should be placed after [/INST].<</SYS>>
[INST] {user_question} [/INST]
assistant:
"""

llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_template))

app = flask.Flask(__name__, template_folder="./")
#app = Flask(__name__)
@app.route('/', methods=['POST'])
def home_api():
    data = request.get_json()
    user_query = data['user_question']
    print(user_query)
    return {"Message":"Flask Home API Deploy Success on HF"}  

@app.route('/api/chat', methods=['POST'])
def chat():
#async def chat(): #不支持async    
    data = request.get_json()
    user_query = data['user_question']
    print("API Input:"+user_query)
    initial_response = llm_chain.run(user_query)
    print("API Call Output:"+initial_response)
    
    #return {'response': initial_response}
    #Working!!!   
    
    #output=jsonify({'response': initial_response})  
    #在return之前执行jsonify方法，OKed
    #return output      
    #Working!!!       
    
    return jsonify({'response': initial_response})    #OKed, working!
#    return initial_response   #JSONDecodeError: Expecting value: line 1 column 1 (char 0)
    #Must return a json result
     
if __name__ == '__main__':
    #app.run(host='0.0.0.0',  port=int(os.environ.get('PORT', 7860)))
    #app.run(host='0.0.0.0',  port=7860)    
    app.run(host='0.0.0.0',  port=7860)
#Working!!!        

# * Serving Flask app 'app'
# * Debug mode: off
#WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
# * Running on all addresses (0.0.0.0)
# * Running on http://127.0.0.1:7860
# * Running on http://10.19.10.85:7860
#A Web Server Gateway Interface (WSGI) server runs Python code to create a web application.
#https://wsgi.readthedocs.io/en/latest/what.html
#https://www.fullstackpython.com/wsgi-servers.html
#https://en.wikipedia.org/wiki/Web_Server_Gateway_Interface