Spaces:

binqiangliu
/

flask_inference_api

Runtime error

File size: 3,496 Bytes

1498bb2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2faf40a
 
 
1498bb2
 
 
 
 
 
 
 
 
 
 
ac2e056
1498bb2
 
 
ac2e056
1498bb2
 
0bd4a2f
1498bb2
a04011c
8dcace6
5f25add
8dcace6
 
c660332
9eac99f
c660332
9eac99f
 
748f54e
 
 
1498bb2
 
 
 
 
0bd4a2f
 
 
 
 
 
 
8dcace6

import flask
from flask import Flask, request, jsonify
from langchain import PromptTemplate, LLMChain
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain import HuggingFaceHub
import os
from dotenv import load_dotenv
load_dotenv()
from huggingface_hub import InferenceClient
from langchain import HuggingFaceHub
import requests
import uuid
import sys

hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
starchat_repo_id = os.environ.get('starchat_repo_id')
llama2_repo_id=os.environ.get('llama2_repo_id')
#port = os.getenv('port')

llm = HuggingFaceHub(repo_id=llama2_repo_id,  #for Llama2
                     #repo_id=starchat_repo_id,     #for StarChat                  
                     huggingfacehub_api_token=hf_token,
                     model_kwargs={#"min_length":512,  #for StarChat
                                   "min_length":1024,  #for Llama2
                                   #"max_new_tokens":3072, "do_sample":True,  #for StarChat
                                   "max_new_tokens":5632, "do_sample":True,  #for Llama2                                 
                                   "temperature":0.1,
                                   "top_k":50,
                                   "top_p":0.95, "eos_token_id":49155}) 

prompt_template = """
<<SYS>>You are a helpful, respectful and honest assistant. If you don't know the answer to a question, please don't share false information.In each conversation, question is placed after [INST] while your answer should be placed after [/INST].<</SYS>>
[INST] {user_question} [/INST]
assistant:
"""

llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_template))

app = flask.Flask(__name__, template_folder="./")
#app = Flask(__name__)
@app.route('/', methods=['POST'])
def home_api():
    data = request.get_json()
    user_query = data['user_question']
    print(user_query)
    return {"Message":"Flask Home API Deploy Success on HF"}  

@app.route('/api/chat', methods=['POST'])
def chat():
#async def chat(): #不支持async    
    data = request.get_json()
    user_query = data['user_question']
    print("API Input:"+user_query)
    initial_response = llm_chain.run(user_query)
    print("API Call Output:"+initial_response)
    
    #return {'response': initial_response}
    #Working!!!   
    
    #output=jsonify({'response': initial_response})  
    #在return之前执行jsonify方法，OKed
    #return output      
    #Working!!!       
    
    return jsonify({'response': initial_response})    #OKed, working!
#    return initial_response   #JSONDecodeError: Expecting value: line 1 column 1 (char 0)
    #Must return a json result
     
if __name__ == '__main__':
    #app.run(host='0.0.0.0',  port=int(os.environ.get('PORT', 7860)))
    #app.run(host='0.0.0.0',  port=7860)    
    app.run(host='0.0.0.0',  port=7860)
#Working!!!        

# * Serving Flask app 'app'
# * Debug mode: off
#WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
# * Running on all addresses (0.0.0.0)
# * Running on http://127.0.0.1:7860
# * Running on http://10.19.10.85:7860
#A Web Server Gateway Interface (WSGI) server runs Python code to create a web application.
#https://wsgi.readthedocs.io/en/latest/what.html
#https://www.fullstackpython.com/wsgi-servers.html
#https://en.wikipedia.org/wiki/Web_Server_Gateway_Interface