File size: 2,584 Bytes
c68fdcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4deb4b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import re
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA

llm = HuggingFaceHub(
    repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    model_kwargs={
        "temperature": 0.75,
        "max_length": 500, 
    }
)

def get_links():
    with open("data/links.txt", "r", encoding="utf-8") as file:
        data = file.read()
    
    lines = data.strip().split('\n')

    places_template = {}
    for line in lines:
        parts = re.split(r':\s*', line, maxsplit=1)
        if len(parts) == 2:
            place = parts[0].strip()
            link = parts[1].strip()
            places_template[place] = link

    return places_template  

def find_places_and_links(text, places):
    results = {}
    
    for place, link in places.items():
        pattern = re.compile(fr'\b{place}\b', flags=re.IGNORECASE)
        matches = pattern.findall(text)
        
        if matches:
            results[place] = link
    
    return results

reviews_file_path = "data/data.txt"
with open(reviews_file_path, "r", encoding="utf-8") as file:
    reviews = file.read().splitlines()

loader = TextLoader(reviews_file_path)
pages = loader.load_and_split()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=511,
    chunk_overlap=100,
    separators=['\n\n', '\n', '(?<=\. )', ' ', '']
)
docs = text_splitter.split_documents(pages)

embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_documents(docs, embeddings)
retriever = vectorstore.as_retriever()
chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

def get_response_from_model(question: str):
    generated_response = chain({"query": question})
    question_index = generated_response["result"].find("Question:") or generated_response["result"].find("Answer:") or generated_response["result"].find("Helpful Answer:") or generated_response["result"].find("Recommended Restaurant:") 
    if question_index != -1:
        answer = generated_response["result"][:question_index].strip()
    else:
        answer = generated_response["result"]

    places = get_links()
    links = find_places_and_links(answer, places)
    output_list = []
    if links:
        output_list = [f'Location:']

        for place, link in links.items():
            output_list.append(f'{place}: {link}')

    return answer + '\n'.join(output_list)