Spaces:
Runtime error
Runtime error
| import flask | |
| from flask import Flask, request, jsonify | |
| from langchain import PromptTemplate, LLMChain | |
| from langchain.chains import LLMChain | |
| from langchain.prompts import PromptTemplate | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain import HuggingFaceHub | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| from huggingface_hub import InferenceClient | |
| from langchain import HuggingFaceHub | |
| import requests | |
| import uuid | |
| import sys | |
| hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN') | |
| starchat_repo_id = os.environ.get('starchat_repo_id') | |
| llama2_repo_id=os.environ.get('llama2_repo_id') | |
| #port = os.getenv('port') | |
| llm = HuggingFaceHub(repo_id=llama2_repo_id, #for Llama2 | |
| #repo_id=starchat_repo_id, #for StarChat | |
| huggingfacehub_api_token=hf_token, | |
| model_kwargs={#"min_length":512, #for StarChat | |
| "min_length":1024, #for Llama2 | |
| #"max_new_tokens":3072, "do_sample":True, #for StarChat | |
| "max_new_tokens":5632, "do_sample":True, #for Llama2 | |
| "temperature":0.1, | |
| "top_k":50, | |
| "top_p":0.95, "eos_token_id":49155}) | |
| prompt_template = """ | |
| <<SYS>>You are a helpful, respectful and honest assistant. If you don't know the answer to a question, please don't share false information.In each conversation, question is placed after [INST] while your answer should be placed after [/INST].<</SYS>> | |
| [INST] {user_question} [/INST] | |
| assistant: | |
| """ | |
| llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_template)) | |
| app = flask.Flask(__name__, template_folder="./") | |
| #app = Flask(__name__) | |
| def home_api(): | |
| data = request.get_json() | |
| user_query = data['user_question'] | |
| print(user_query) | |
| return {"Message":"Flask Home API Deploy Success on HF"} | |
| def chat(): | |
| #async def chat(): #不支持async | |
| data = request.get_json() | |
| user_query = data['user_question'] | |
| print("API Input:"+user_query) | |
| initial_response = llm_chain.run(user_query) | |
| print("API Call Output:"+initial_response) | |
| #return {'response': initial_response} | |
| #Working!!! | |
| #output=jsonify({'response': initial_response}) | |
| #在return之前执行jsonify方法,OKed | |
| #return output | |
| #Working!!! | |
| return jsonify({'response': initial_response}) #OKed, working! | |
| # return initial_response #JSONDecodeError: Expecting value: line 1 column 1 (char 0) | |
| #Must return a json result | |
| if __name__ == '__main__': | |
| #app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860))) | |
| #app.run(host='0.0.0.0', port=7860) | |
| app.run(host='0.0.0.0', port=7860) | |
| #Working!!! | |
| # * Serving Flask app 'app' | |
| # * Debug mode: off | |
| #WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. | |
| # * Running on all addresses (0.0.0.0) | |
| # * Running on http://127.0.0.1:7860 | |
| # * Running on http://10.19.10.85:7860 | |
| #A Web Server Gateway Interface (WSGI) server runs Python code to create a web application. | |
| #https://wsgi.readthedocs.io/en/latest/what.html | |
| #https://www.fullstackpython.com/wsgi-servers.html | |
| #https://en.wikipedia.org/wiki/Web_Server_Gateway_Interface |