File size: 1,389 Bytes
b03d027
70e88e4
aad91d6
70e88e4
b03d027
433bee9
70e88e4
97f53dd
70e88e4
 
 
b03d027
97f53dd
 
b03d027
70e88e4
 
 
 
 
 
 
 
 
97f53dd
b03d027
70e88e4
 
 
 
 
 
 
 
 
97f53dd
70e88e4
b03d027
97f53dd
 
70e88e4
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import os
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFaceEndpoint
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader

# HF Spaces will inject token automatically
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
os.environ["HF_HOME"] = "/tmp/hf_home"

def load_vectorstore():
    embedder = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

    loader = TextLoader("app/data/analysis_summary.txt")
    documents = loader.load()

    splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = splitter.split_documents(documents)

    vectorstore = FAISS.from_documents(chunks, embedder)
    return vectorstore.as_retriever()

def build_qa_chain():
    llm = HuggingFaceEndpoint(
        repo_id="google/flan-t5-base",
        model_kwargs={"temperature": 0.3, "max_length": 512}
    )

    retriever = load_vectorstore()
    chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        return_source_documents=False
    )
    return chain

qa_chain = build_qa_chain()

def get_bot_answer(query: str) -> str:
    try:
        return qa_chain.run(query)
    except Exception as e:
        return f"❌ Error: {str(e)}"