Spaces:
Runtime error
Runtime error
File size: 3,496 Bytes
1498bb2 2faf40a 1498bb2 ac2e056 1498bb2 ac2e056 1498bb2 0bd4a2f 1498bb2 a04011c 8dcace6 5f25add 8dcace6 c660332 9eac99f c660332 9eac99f 748f54e 1498bb2 0bd4a2f 8dcace6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import flask
from flask import Flask, request, jsonify
from langchain import PromptTemplate, LLMChain
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain import HuggingFaceHub
import os
from dotenv import load_dotenv
load_dotenv()
from huggingface_hub import InferenceClient
from langchain import HuggingFaceHub
import requests
import uuid
import sys
hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
starchat_repo_id = os.environ.get('starchat_repo_id')
llama2_repo_id=os.environ.get('llama2_repo_id')
#port = os.getenv('port')
llm = HuggingFaceHub(repo_id=llama2_repo_id, #for Llama2
#repo_id=starchat_repo_id, #for StarChat
huggingfacehub_api_token=hf_token,
model_kwargs={#"min_length":512, #for StarChat
"min_length":1024, #for Llama2
#"max_new_tokens":3072, "do_sample":True, #for StarChat
"max_new_tokens":5632, "do_sample":True, #for Llama2
"temperature":0.1,
"top_k":50,
"top_p":0.95, "eos_token_id":49155})
prompt_template = """
<<SYS>>You are a helpful, respectful and honest assistant. If you don't know the answer to a question, please don't share false information.In each conversation, question is placed after [INST] while your answer should be placed after [/INST].<</SYS>>
[INST] {user_question} [/INST]
assistant:
"""
llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_template))
app = flask.Flask(__name__, template_folder="./")
#app = Flask(__name__)
@app.route('/', methods=['POST'])
def home_api():
data = request.get_json()
user_query = data['user_question']
print(user_query)
return {"Message":"Flask Home API Deploy Success on HF"}
@app.route('/api/chat', methods=['POST'])
def chat():
#async def chat(): #不支持async
data = request.get_json()
user_query = data['user_question']
print("API Input:"+user_query)
initial_response = llm_chain.run(user_query)
print("API Call Output:"+initial_response)
#return {'response': initial_response}
#Working!!!
#output=jsonify({'response': initial_response})
#在return之前执行jsonify方法,OKed
#return output
#Working!!!
return jsonify({'response': initial_response}) #OKed, working!
# return initial_response #JSONDecodeError: Expecting value: line 1 column 1 (char 0)
#Must return a json result
if __name__ == '__main__':
#app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))
#app.run(host='0.0.0.0', port=7860)
app.run(host='0.0.0.0', port=7860)
#Working!!!
# * Serving Flask app 'app'
# * Debug mode: off
#WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
# * Running on all addresses (0.0.0.0)
# * Running on http://127.0.0.1:7860
# * Running on http://10.19.10.85:7860
#A Web Server Gateway Interface (WSGI) server runs Python code to create a web application.
#https://wsgi.readthedocs.io/en/latest/what.html
#https://www.fullstackpython.com/wsgi-servers.html
#https://en.wikipedia.org/wiki/Web_Server_Gateway_Interface |