File size: 8,296 Bytes
dea2c24 226ef43 11f5bf4 1e934ec dea2c24 2216dc7 aecb436 11f5bf4 7d505c6 11f5bf4 aecb436 11f5bf4 0538474 11f5bf4 bce90b4 11f5bf4 bce90b4 4055c38 1c303f5 2ff8c45 42dcd1d 11f5bf4 ff6272b 11f5bf4 ff6272b 11f5bf4 a3fb620 1e934ec 13fe061 ff6272b 83ebb0e 1b25a35 278db2f 83ebb0e 1e934ec 11f5bf4 323dc59 947860c 323dc59 235395f d7b1a24 b8b8bd6 b2cfb1e d7b1a24 b2cfb1e d7b1a24 47825be d7b1a24 dc3e4c0 a9ddb96 0b92789 11f5bf4 dea2c24 ead1b81 1b5d3fa 937c441 15e96ef 47825be e535008 f8ce3be e535008 228bf86 0de2123 e535008 8ea7d45 e535008 10d33a9 398c271 da57524 dea2c24 55d391a b9b35f4 a9ddb96 dea2c24 3bab283 dea2c24 3bab283 dea2c24 065778e dea2c24 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | import gradio as gr
from huggingface_hub import InferenceClient
from huggingface_hub import login
import re
import pandas as pd
from langchain.schema import Document
from langchain.text_splitter import TokenTextSplitter
from transformers import AutoTokenizer
import copy
from langchain_community.retrievers import BM25Retriever
from langchain_huggingface.llms.huggingface_endpoint import HuggingFaceEndpoint
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
df1 = pd.read_csv("./data/champions_data_lol.csv")
df1['Story'] = df1['Story'].astype(str)
# Pre-processing
def preprocess_for_bm25(text):
# Replace "..." with a unique placeholder
text = text.replace("...", " _ELLIPSIS_ ")
# Add space before and after punctuation (except "_ELLIPSIS_")
text = re.sub(r'([.,!?()"\'])', r' \1 ', text) # General case for punctuation
# Restore "..." from the placeholder
text = text.replace("_ELLIPSIS_", "...")
# Normalize spaces
text = re.sub(r'\s+', ' ', text).strip()
text = text.lower()
return text
"""Pre-processing"""
# Convert DataFrame to documents
documents = []
for _, row in df1.iterrows():
biography_text = row['Story']
documents.append(Document(
page_content= biography_text, # Text of the chunk
metadata= {
'champion_name': row['Champion'],
'role': row['Role']}
))
"""Chunking"""
# Specify the model name
EMBEDDING_MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
tokenizer_name = EMBEDDING_MODEL_NAME
# Token splitting for more context split
text_splitter = TokenTextSplitter.from_huggingface_tokenizer(
tokenizer=AutoTokenizer.from_pretrained(tokenizer_name),
chunk_size=150,
chunk_overlap=15
)
chunks = text_splitter.split_documents(documents) # chunks used for LLM generation
chunks_bm25 = copy.deepcopy(chunks) # Creates an independent copy, chunks used for BM25 retriever
for i, doc in enumerate(chunks_bm25):
doc.page_content = preprocess_for_bm25(doc.page_content) # Modify page_content in place
doc.metadata["index"] = i # Add an index for tracking
for i, doc in enumerate(chunks):
doc.metadata["index"] = i # Add an index for tracking
"""Retriever"""
bm25_retriever = BM25Retriever.from_documents(chunks_bm25, k = 4) # 2 most similar contexts
def retriever(query):
tmp = bm25_retriever.invoke(preprocess_for_bm25(query))
context = []
for doc in tmp:
index = doc.metadata['index']
context.append(chunks[index])
return context
"""Chain"""
#from langchain_core.runnables.passthrough import RunnablePassthrough
#from langchain.prompts import ChatPromptTemplate
#from langchain_core.output_parsers.string import StrOutputParser
from langchain_community.llms.huggingface_hub import HuggingFaceHub
#=import os
#from langchain_core.runnables import RunnableLambda
#prompt_template = ChatPromptTemplate.from_template(prompt)
"""llm = HuggingFaceHub(
repo_id="HuggingFaceH4/zephyr-7b-beta",
#repo_id="google-bert/bert-base-uncased",
model_kwargs={
"temperature": 0.1,
"max_length": 5,
"return_full_text": False
}
"""
# Set the correct endpoint and task for the model
def ra(user_question):
#prompt = f"You know things about League of Legends. Please correct the following question for grammar and clarity. Do not give explaination."
#prompt = f"You know things about League of Legends. Please ONLY correct the following question for grammar and clarity. Do NOT give explaination:\n{user_question}\nCorrected question:"
#You are an expert in League of Legends. You correct grammar and clarity issues in questions. Only return the corrected question itself—do not add explanations, extra text, or anything in parentheses.
#res1 = client_bis.text_generation(f"You know things about League of Legends. Please correct the following question for grammar and clarity.Do not give explaination:\n{user_question}\nCorrected question:", stream =False,max_new_tokens= 10, temperature = 0.1 )
messages_q=[
{"role": "system", "content": "You are familiar with League of Legends lore. You help correct grammar and clarity without giving additional explanations."},
#{"role": "system", "content": f"""You are a helpful AI that corrects grammar and clarity without giving additional explanations. You only return the corrected question itself.
#"""},
{"role": "user", "content": f"Fix any grammar or clarity issues in the following question. Only return the corrected question itself.\n\n{user_question}"}
#Fix any grammar or clarity issues in the following question. Only return the output itself.\n\n{user_question}\nOutput:
#{"role": "user", "content": f"Please fix any grammar and clarity issues in the following question: {user_question}"}
]
print(messages_q)
res = client.chat_completion(messages_q,
max_tokens=30,
stream=False,
temperature=0.1,
stop = ['('])
return copy.deepcopy(res["choices"][0]["message"]["content"])
# chain = RunnablePassthrough() | RunnableLambda(ra) | prompt_template | client.chat_completion() | StrOutputParser() for notebook
"""-------------------------------------------------------------------"""
def respond(
message,
history: list[tuple[str, str]],
max_tokens,
temperature,
top_p,
):
new_query = ra(message)
print("old: ",new_query)
#new_query = str(new_query.split("Output: ", 1)[-1] if "Output: " in new_query else new_query)
if new_query[-1] == "(":
new_query = new_query[:-1]
print("new: ",new_query)
system_message = f"""You are an expert in League of Legends (LoL) lore. You will only answer questions related to the champions and their stories within the game.
Instructions:
1. Use only the provided context to answer. Do not make assumptions beyond it.
2. If a question is outside LoL lore, respond: "Please ask something related to League of Legends lore."
3. If the context lacks a clear answer, respond: "I'm unsure based on the provided context."
4. Answer up to two sentences, ensuring clarity and completeness.
"""
system_message = f"""
You are an expert in League of Legends (LoL) lore. You will only answer questions related to the champions and their stories within the game.=
Instructions:
1. Only use the context provided below to answer the question. Reference the context directly for accuracy.
2. If the question is outside the scope of League of Legends lore, respond: "Please ask something related to League of Legends lore."
3. If the provided context does not provide a clear answer, respond: "I'm unsure based on the provided context."
"""
print(system_message)
messages = [{"role": "system", "content": "You are an expert in League of Legends (LoL) lore. You will only answer questions related to the champions and their stories within the game."}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": "Context: "+str(retriever(new_query))+"\n\nQuestion: "+new_query +"\n\nAnswer: "})
print("Context: "+str(retriever(new_query))+"\n\nQuestion: "+new_query +"\n\nAnswer: ")
response = ""
for message in client.chat_completion(
messages,
max_tokens=200,
stream=True,
temperature=0.1
):
token = message.choices[0].delta.content
response += token
yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
with gr.Blocks() as demo:
gr.Markdown("""
# League of Legends Lore Chatbot
Welcome to the **LoL Lore Chatbot**! 🏆
Here, you can ask questions about League of Legends champions and their stories.
**Example Question:**
*Why does Kayn have different forms?*
""")
chat = gr.ChatInterface(respond)
if __name__ == "__main__":
demo.launch()
|