|
|
|
|
|
from langchain.document_loaders import TextLoader |
|
|
from langchain.text_splitter import CharacterTextSplitter |
|
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
|
from langchain.vectorstores import FAISS |
|
|
from langchain import HuggingFaceHub |
|
|
from langchain.chains.question_answering import load_qa_chain |
|
|
import streamlit as st |
|
|
input_file="/content/drive/MyDrive/Python_Code/streamlit_01/T1.txt" |
|
|
output_file="/content/drive/MyDrive/Python_Code/streamlit_01/T2.txt" |
|
|
import os |
|
|
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["Key2"] |
|
|
|
|
|
|
|
|
def convert_to_utf8(input_file, output_file): |
|
|
try: |
|
|
|
|
|
with open(input_file, 'r') as file: |
|
|
text = file.read() |
|
|
|
|
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as file: |
|
|
file.write(text) |
|
|
|
|
|
print(f"تبدیل فایل {input_file} به فرمت UTF-8 با موفقیت انجام شد و در فایل {output_file} ذخیره شد.") |
|
|
except Exception as e: |
|
|
print(f"خطا در تبدیل فایل به فرمت UTF-8: {str(e)}") |
|
|
|
|
|
|
|
|
|
|
|
def load_file(): |
|
|
loader = TextLoader('d2.txt') |
|
|
documents = loader.load() |
|
|
return documents |
|
|
|
|
|
documents=load_file() |
|
|
|
|
|
chunk_size=2000 |
|
|
chunk_overlap=200 |
|
|
def build_model(documents,chunk_size,chunk_overlap): |
|
|
|
|
|
text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) |
|
|
docs = text_splitter.split_documents(documents) |
|
|
|
|
|
embeddings = HuggingFaceEmbeddings() |
|
|
|
|
|
db = FAISS.from_documents(docs, embeddings) |
|
|
return db |
|
|
|
|
|
db=build_model(documents,chunk_size,chunk_overlap) |
|
|
|
|
|
|
|
|
def load_model(query,db): |
|
|
llm=HuggingFaceHub(repo_id="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"temperature":0.1, "max_length":400}) |
|
|
chain = load_qa_chain(llm, chain_type="stuff") |
|
|
docs = db.similarity_search(query) |
|
|
out=chain.run(input_documents=docs, question=query) |
|
|
return out |
|
|
|
|
|
|
|
|
def find_help(text): |
|
|
import streamlit as st |
|
|
|
|
|
index_help = text.find("Helpful Answer:") |
|
|
|
|
|
if index_help != -1: |
|
|
|
|
|
content_after_help = text[index_help + len("Helpful Answer:"):] |
|
|
|
|
|
|
|
|
index_end = content_after_help.find("Question") |
|
|
if index_end != -1 and index_end <= 400: |
|
|
|
|
|
txtA=content_after_help[:index_end + len("Question")-8] |
|
|
print(content_after_help[:index_end + len("Question")-8]) |
|
|
st.write(txtA) |
|
|
else: |
|
|
|
|
|
txtB=content_after_help[:400] |
|
|
print(content_after_help[:400]) |
|
|
st.write(txtB) |
|
|
else: |
|
|
print("کلمه 'help' در متن یافت نشد.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|