File size: 2,584 Bytes
c68fdcf c4deb4b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import os
import re
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA
llm = HuggingFaceHub(
repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
model_kwargs={
"temperature": 0.75,
"max_length": 500,
}
)
def get_links():
with open("data/links.txt", "r", encoding="utf-8") as file:
data = file.read()
lines = data.strip().split('\n')
places_template = {}
for line in lines:
parts = re.split(r':\s*', line, maxsplit=1)
if len(parts) == 2:
place = parts[0].strip()
link = parts[1].strip()
places_template[place] = link
return places_template
def find_places_and_links(text, places):
results = {}
for place, link in places.items():
pattern = re.compile(fr'\b{place}\b', flags=re.IGNORECASE)
matches = pattern.findall(text)
if matches:
results[place] = link
return results
reviews_file_path = "data/data.txt"
with open(reviews_file_path, "r", encoding="utf-8") as file:
reviews = file.read().splitlines()
loader = TextLoader(reviews_file_path)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=511,
chunk_overlap=100,
separators=['\n\n', '\n', '(?<=\. )', ' ', '']
)
docs = text_splitter.split_documents(pages)
embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_documents(docs, embeddings)
retriever = vectorstore.as_retriever()
chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
def get_response_from_model(question: str):
generated_response = chain({"query": question})
question_index = generated_response["result"].find("Question:") or generated_response["result"].find("Answer:") or generated_response["result"].find("Helpful Answer:") or generated_response["result"].find("Recommended Restaurant:")
if question_index != -1:
answer = generated_response["result"][:question_index].strip()
else:
answer = generated_response["result"]
places = get_links()
links = find_places_and_links(answer, places)
output_list = []
if links:
output_list = [f'Location:']
for place, link in links.items():
output_list.append(f'{place}: {link}')
return answer + '\n'.join(output_list) |