| import os | |
| import re | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.llms import HuggingFaceHub | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import FAISS | |
| from langchain.document_loaders import TextLoader | |
| from langchain.chains import RetrievalQA | |
| llm = HuggingFaceHub( | |
| repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
| model_kwargs={ | |
| "temperature": 0.75, | |
| "max_length": 500, | |
| } | |
| ) | |
| def get_links(): | |
| with open("data/links.txt", "r", encoding="utf-8") as file: | |
| data = file.read() | |
| lines = data.strip().split('\n') | |
| places_template = {} | |
| for line in lines: | |
| parts = re.split(r':\s*', line, maxsplit=1) | |
| if len(parts) == 2: | |
| place = parts[0].strip() | |
| link = parts[1].strip() | |
| places_template[place] = link | |
| return places_template | |
| def find_places_and_links(text, places): | |
| results = {} | |
| for place, link in places.items(): | |
| pattern = re.compile(fr'\b{place}\b', flags=re.IGNORECASE) | |
| matches = pattern.findall(text) | |
| if matches: | |
| results[place] = link | |
| return results | |
| reviews_file_path = "data/data.txt" | |
| with open(reviews_file_path, "r", encoding="utf-8") as file: | |
| reviews = file.read().splitlines() | |
| loader = TextLoader(reviews_file_path) | |
| pages = loader.load_and_split() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=511, | |
| chunk_overlap=100, | |
| separators=['\n\n', '\n', '(?<=\. )', ' ', ''] | |
| ) | |
| docs = text_splitter.split_documents(pages) | |
| embeddings = HuggingFaceEmbeddings() | |
| vectorstore = FAISS.from_documents(docs, embeddings) | |
| retriever = vectorstore.as_retriever() | |
| chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever) | |
| def get_response_from_model(question: str): | |
| generated_response = chain({"query": question}) | |
| question_index = generated_response["result"].find("Question:") or generated_response["result"].find("Answer:") or generated_response["result"].find("Helpful Answer:") or generated_response["result"].find("Recommended Restaurant:") | |
| if question_index != -1: | |
| answer = generated_response["result"][:question_index].strip() | |
| else: | |
| answer = generated_response["result"] | |
| places = get_links() | |
| links = find_places_and_links(answer, places) | |
| output_list = [] | |
| if links: | |
| output_list = [f'Location:'] | |
| for place, link in links.items(): | |
| output_list.append(f'{place}: {link}') | |
| return answer + '\n'.join(output_list) |