Spaces:
Runtime error
Runtime error
Commit ·
1498bb2
1
Parent(s): f0cf269
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import flask
|
| 2 |
+
from flask import Flask, request, jsonify
|
| 3 |
+
from langchain import PromptTemplate, LLMChain
|
| 4 |
+
from langchain.chains import LLMChain
|
| 5 |
+
from langchain.prompts import PromptTemplate
|
| 6 |
+
from langchain.memory import ConversationBufferMemory
|
| 7 |
+
from langchain import HuggingFaceHub
|
| 8 |
+
import os
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
+
load_dotenv()
|
| 11 |
+
from huggingface_hub import InferenceClient
|
| 12 |
+
from langchain import HuggingFaceHub
|
| 13 |
+
import requests
|
| 14 |
+
import uuid
|
| 15 |
+
import sys
|
| 16 |
+
|
| 17 |
+
hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
|
| 18 |
+
starchat_repo_id = os.environ.get('starchat_repo_id')
|
| 19 |
+
llama2_repo_id=os.environ.get('llama2_repo_id')
|
| 20 |
+
#port = os.getenv('port')
|
| 21 |
+
|
| 22 |
+
llm = HuggingFaceHub(repo_id=llama2_repo_id, #for Llama2
|
| 23 |
+
#repo_id=starchat_repo_id, #for StarChat
|
| 24 |
+
huggingfacehub_api_token=hf_token,
|
| 25 |
+
model_kwargs={#"min_length":512, #for StarChat
|
| 26 |
+
"min_length":1024, #for Llama2
|
| 27 |
+
#"max_new_tokens":3072, "do_sample":True, #for StarChat
|
| 28 |
+
"max_new_tokens":5632, "do_sample":True, #for Llama2
|
| 29 |
+
"temperature":0.1,
|
| 30 |
+
"top_k":50,
|
| 31 |
+
"top_p":0.95, "eos_token_id":49155})
|
| 32 |
+
#似乎是因为iShare不是HF的Pro账户,所以会遇到Runtime error: Scheduling failure: not enough hardware capacity(设置为llama2_repo_id时)???
|
| 33 |
+
|
| 34 |
+
prompt_template = """
|
| 35 |
+
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
|
| 36 |
+
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. Here is the user question {user_question}.
|
| 37 |
+
Helpful AI Answer:
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_template))
|
| 41 |
+
|
| 42 |
+
app = flask.Flask(__name__, template_folder="./")
|
| 43 |
+
#app = Flask(__name__)
|
| 44 |
+
@app.route('/', methods=['POST'])
|
| 45 |
+
def home_api():
|
| 46 |
+
data = request.get_json()
|
| 47 |
+
user_query = data['user_question']
|
| 48 |
+
print(user_query)
|
| 49 |
+
return {"Hey":"Flask Home API Deploy Success on HF"}
|
| 50 |
+
|
| 51 |
+
@app.route('/api/chat', methods=['POST'])
|
| 52 |
+
def chat():
|
| 53 |
+
#async def chat():
|
| 54 |
+
#不支持async
|
| 55 |
+
data = request.get_json()
|
| 56 |
+
user_query = data['user_question']
|
| 57 |
+
initial_response = llm_chain.run(user_query)
|
| 58 |
+
#output=jsonify({'response': initial_response})
|
| 59 |
+
return {'response': initial_response}
|
| 60 |
+
#return output
|
| 61 |
+
|
| 62 |
+
if __name__ == '__main__':
|
| 63 |
+
#app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))
|
| 64 |
+
#app.run(host='0.0.0.0', port=7860)
|
| 65 |
+
app.run(host='0.0.0.0', port=7860)
|
| 66 |
+
#Working!!!
|