Spaces:

binqiangliu
/

flask_inference_api

Runtime error

App Files Files Community

flask_inference_api / app.py

binqiangliu

Update app.py

2faf40a over 2 years ago

raw

history blame contribute delete

3.5 kB

	import flask
	from flask import Flask, request, jsonify
	from langchain import PromptTemplate, LLMChain
	from langchain.chains import LLMChain
	from langchain.prompts import PromptTemplate
	from langchain.memory import ConversationBufferMemory
	from langchain import HuggingFaceHub
	import os
	from dotenv import load_dotenv
	load_dotenv()
	from huggingface_hub import InferenceClient
	from langchain import HuggingFaceHub
	import requests
	import uuid
	import sys

	hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
	starchat_repo_id = os.environ.get('starchat_repo_id')
	llama2_repo_id=os.environ.get('llama2_repo_id')
	#port = os.getenv('port')

	llm = HuggingFaceHub(repo_id=llama2_repo_id, #for Llama2
	#repo_id=starchat_repo_id, #for StarChat
	huggingfacehub_api_token=hf_token,
	model_kwargs={#"min_length":512, #for StarChat
	"min_length":1024, #for Llama2
	#"max_new_tokens":3072, "do_sample":True, #for StarChat
	"max_new_tokens":5632, "do_sample":True, #for Llama2
	"temperature":0.1,
	"top_k":50,
	"top_p":0.95, "eos_token_id":49155})

	prompt_template = """
	<<SYS>>You are a helpful, respectful and honest assistant. If you don't know the answer to a question, please don't share false information.In each conversation, question is placed after [INST] while your answer should be placed after [/INST].<</SYS>>
	[INST] {user_question} [/INST]
	assistant:
	"""

	llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_template))

	app = flask.Flask(__name__, template_folder="./")
	#app = Flask(__name__)
	@app.route('/', methods=['POST'])
	def home_api():
	data = request.get_json()
	user_query = data['user_question']
	print(user_query)
	return {"Message":"Flask Home API Deploy Success on HF"}

	@app.route('/api/chat', methods=['POST'])
	def chat():
	#async def chat(): #不支持async
	data = request.get_json()
	user_query = data['user_question']
	print("API Input:"+user_query)
	initial_response = llm_chain.run(user_query)
	print("API Call Output:"+initial_response)

	#return {'response': initial_response}
	#Working!!!

	#output=jsonify({'response': initial_response})
	#在return之前执行jsonify方法，OKed
	#return output
	#Working!!!

	return jsonify({'response': initial_response}) #OKed, working!
	# return initial_response #JSONDecodeError: Expecting value: line 1 column 1 (char 0)
	#Must return a json result

	if __name__ == '__main__':
	#app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))
	#app.run(host='0.0.0.0', port=7860)
	app.run(host='0.0.0.0', port=7860)
	#Working!!!

	# * Serving Flask app 'app'
	# * Debug mode: off
	#WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
	# * Running on all addresses (0.0.0.0)
	# * Running on http://127.0.0.1:7860
	# * Running on http://10.19.10.85:7860
	#A Web Server Gateway Interface (WSGI) server runs Python code to create a web application.
	#https://wsgi.readthedocs.io/en/latest/what.html
	#https://www.fullstackpython.com/wsgi-servers.html
	#https://en.wikipedia.org/wiki/Web_Server_Gateway_Interface