| from pathlib import Path |
|
|
| from .configs import DATA_DIR, INDEX_DIR, EMBEDDING_MODEL, CHUNK_SIZE, CHUNK_OVERLAP |
|
|
|
|
|
|
|
|
| |
|
|
| def get_data(): |
| data = { |
| "File Name": [], |
| "Context": [], |
| "Content": [], |
| } |
|
|
| for file_path in DATA_DIR.rglob("*.txt"): |
| content = file_path.read_text(encoding="utf-8") |
|
|
| data["File Name"].append(file_path.name) |
| data["Content"].append(content) |
|
|
| |
| context = file_path.parent.relative_to(DATA_DIR) |
| data["Context"].append(str(context)) |
|
|
| print(f"Read file: {context}/{file_path.name} ({len(content)})") |
|
|
| return data |
|
|
|
|
| def build_embeddings(): |
| """Instancia o modelo de embeddings HuggingFace.""" |
| raise NotImplementedError |
|
|
|
|
| import os |
| from langchain_groq import ChatGroq |
| from langchain_huggingface import HuggingFaceEmbeddings |
| from langchain_community.vectorstores import FAISS |
| from langchain_text_splitters import CharacterTextSplitter |
| from langchain.chains import RetrievalQA |
| from langchain_community.document_loaders import TextLoader |
|
|
| |
| |
|
|
| |
| |
| embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
|
|
| |
| loader = TextLoader("dados.txt") |
| documents = loader.load() |
| text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100) |
| docs = text_splitter.split_documents(documents) |
|
|
| |
| vectorstore = FAISS.from_documents(docs, embeddings) |
|
|
| |
| |
| llm = ChatGroq( |
| model_name="llama-3.3-70b-versatile", |
| temperature=0 |
| ) |
|
|
| |
| qa_chain = RetrievalQA.from_chain_type( |
| llm=llm, |
| chain_type="stuff", |
| retriever=vectorstore.as_retriever() |
| ) |
|
|
| |
| pergunta = "Resuma os pontos principais do documento." |
| resposta = qa_chain.invoke(pergunta) |
| print(f"Resposta do Groq:\n{resposta['result']}") |