import flask from flask import Flask, request, jsonify from langchain import PromptTemplate, LLMChain from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from langchain.memory import ConversationBufferMemory from langchain import HuggingFaceHub import os from dotenv import load_dotenv load_dotenv() from huggingface_hub import InferenceClient from langchain import HuggingFaceHub import requests import uuid import sys hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN') starchat_repo_id = os.environ.get('starchat_repo_id') llama2_repo_id=os.environ.get('llama2_repo_id') #port = os.getenv('port') llm = HuggingFaceHub(repo_id=llama2_repo_id, #for Llama2 #repo_id=starchat_repo_id, #for StarChat huggingfacehub_api_token=hf_token, model_kwargs={#"min_length":512, #for StarChat "min_length":1024, #for Llama2 #"max_new_tokens":3072, "do_sample":True, #for StarChat "max_new_tokens":5632, "do_sample":True, #for Llama2 "temperature":0.1, "top_k":50, "top_p":0.95, "eos_token_id":49155}) prompt_template = """ <>You are a helpful, respectful and honest assistant. If you don't know the answer to a question, please don't share false information.In each conversation, question is placed after [INST] while your answer should be placed after [/INST].<> [INST] {user_question} [/INST] assistant: """ llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_template)) app = flask.Flask(__name__, template_folder="./") #app = Flask(__name__) @app.route('/', methods=['POST']) def home_api(): data = request.get_json() user_query = data['user_question'] print(user_query) return {"Message":"Flask Home API Deploy Success on HF"} @app.route('/api/chat', methods=['POST']) def chat(): #async def chat(): #不支持async data = request.get_json() user_query = data['user_question'] print("API Input:"+user_query) initial_response = llm_chain.run(user_query) print("API Call Output:"+initial_response) #return {'response': initial_response} #Working!!! #output=jsonify({'response': initial_response}) #在return之前执行jsonify方法,OKed #return output #Working!!! return jsonify({'response': initial_response}) #OKed, working! # return initial_response #JSONDecodeError: Expecting value: line 1 column 1 (char 0) #Must return a json result if __name__ == '__main__': #app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860))) #app.run(host='0.0.0.0', port=7860) app.run(host='0.0.0.0', port=7860) #Working!!! # * Serving Flask app 'app' # * Debug mode: off #WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. # * Running on all addresses (0.0.0.0) # * Running on http://127.0.0.1:7860 # * Running on http://10.19.10.85:7860 #A Web Server Gateway Interface (WSGI) server runs Python code to create a web application. #https://wsgi.readthedocs.io/en/latest/what.html #https://www.fullstackpython.com/wsgi-servers.html #https://en.wikipedia.org/wiki/Web_Server_Gateway_Interface