Spaces:

ali121300
/

St3

Sleeping

File size: 4,100 Bytes

# Document Loader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain import HuggingFaceHub
from langchain.chains.question_answering import load_qa_chain
import streamlit as st
input_file="/content/drive/MyDrive/Python_Code/streamlit_01/T1.txt"
output_file="/content/drive/MyDrive/Python_Code/streamlit_01/T2.txt"
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["Key2"]

################################################################################################
def convert_to_utf8(input_file, output_file):
    try:
        # باز کردن فایل متنی با فرمت پیش‌فرض
        with open(input_file, 'r') as file:
            text = file.read()

        # نوشتن محتوای متنی در فایل جدید با فرمت UTF-8
        with open(output_file, 'w', encoding='utf-8') as file:
            file.write(text)
        
        print(f"تبدیل فایل {input_file} به فرمت UTF-8 با موفقیت انجام شد و در فایل {output_file} ذخیره شد.")
    except Exception as e:
        print(f"خطا در تبدیل فایل به فرمت UTF-8: {str(e)}")
convert_to_utf8(input_file=input_file,output_file=output_file)
######################################################################################################

def load_file(): 
    loader = TextLoader('d2.txt')
    documents = loader.load()
    return documents 
######################################################################################################
documents=load_file()

chunk_size=2000
chunk_overlap=400
def build_model(documents,chunk_size,chunk_overlap):
  # Text Splitter
  text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  docs = text_splitter.split_documents(documents)
  # Embeddings
  embeddings = HuggingFaceEmbeddings()
  # Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
  db = FAISS.from_documents(docs, embeddings)
  return db
####################################################################################################
db=build_model(documents,chunk_size,chunk_overlap)

##################################################################################################
def load_model(query,db):
    llm=HuggingFaceHub(repo_id="google/gemma-7b", model_kwargs={"temperature":0.1, "max_length":600})
    chain = load_qa_chain(llm, chain_type="stuff")
    docs = db.similarity_search(query)
    out=chain.run(input_documents=docs, question=query)
    return out
##############################################################################################

def find_help(text):
  import streamlit as st
    # یافتن اولین وقوع کلمه "help"
  index_help = text.find("Helpful Answer:")

  if index_help != -1:
        # اگر "help" یافت شد، 50 کاراکتر پس از آن را بررسی کنید
        content_after_help = text[index_help + len("Helpful Answer:"):]

        # بررسی اینکه آیا "end" قبل از 50 کاراکتر بعد از "help" وجود دارد یا خیر
        index_end = content_after_help.find("Question")
        if index_end != -1 and index_end <= 400:
            # اگر "end" قبل از 50 کاراکتر بعد از "help" وجود داشت، بقیه متن را نمایش ندهید
            txtA=content_after_help[:index_end + len("Question")-8]
            print(content_after_help[:index_end + len("Question")-8])
            st.write(txtA)
        else:
            # اگر "end" وجود نداشت یا بعد از 50 کاراکتر بعد از "help" بود، بقیه متن را چاپ کنید
            txtB=content_after_help[:400]
            print(content_after_help[:400])
            st.write(txtB)
  else:
        print("کلمه 'help' در متن یافت نشد.")

################################################################################################