File size: 2,953 Bytes
19fd61c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c80adf
 
19fd61c
 
7437646
 
19fd61c
5377385
 
 
4b4c2a9
19fd61c
 
 
b7f5046
19fd61c
 
2a5ea04
 
 
19fd61c
 
 
 
 
 
 
 
 
 
ad35c80
19fd61c
 
f6fc821
 
19fd61c
 
 
 
ac32b30
3f71b71
ac32b30
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import flask
from flask import Flask, request, jsonify
from langchain import PromptTemplate, LLMChain
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain import HuggingFaceHub
import os
from dotenv import load_dotenv
load_dotenv()
from huggingface_hub import InferenceClient
from langchain import HuggingFaceHub
import requests
import uuid
import sys

hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
starchat_repo_id = os.environ.get('starchat_repo_id')
llama2_repo_id=os.environ.get('llama2_repo_id')
#port = os.getenv('port')

llm = HuggingFaceHub(repo_id=llama2_repo_id,  #for Llama2
                     #repo_id=starchat_repo_id,     #for StarChat                  
                     huggingfacehub_api_token=hf_token,
                     model_kwargs={#"min_length":512,  #for StarChat
                                   "min_length":1024,  #for Llama2
                                   #"max_new_tokens":3072, "do_sample":True,  #for StarChat
                                   "max_new_tokens":5632, "do_sample":True,  #for Llama2                                 
                                   "temperature":0.1,
                                   "top_k":50,
                                   "top_p":0.95, "eos_token_id":49155}) 
#似乎是因为iShare不是HF的Pro账户,所以会遇到Runtime error: Scheduling failure: not enough hardware capacity(设置为llama2_repo_id时)???

prompt_template = """
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. Here is the user question {user_question}.
Helpful AI Answer:
"""

llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_template))

app = Flask(__name__)
@app.route('/', methods=['POST'])
def home_api():
    data = request.get_json()
    user_query = data['user_question']
    print(user_query)
    return {"Message":"Flask Home API Deploy Success on HF"}  

@app.route('/api/chat', methods=['POST'])
def chat():
#async def chat():  #Failed - Flask API 不支持async???
    data = request.get_json()
    user_query = data['user_question']
    initial_response = llm_chain.run(user_query)
    #return jsonify({'response': initial_response})
    #找到问题了:jsonify在Huggingface不支持;在Github然后部署到Render是可以的!---NO No No, it's supported
    #return {'response': initial_response}
    return jsonify({'response': initial_response})  #tried and OKed!