Spaces:

ali121300
/

st_2

Sleeping

App Files Files Community

st_2 / app2.py

ali121300

Update app2.py

e245ef9 verified almost 2 years ago

raw

history blame contribute delete

4.06 kB

	# Document Loader
	from langchain.document_loaders import TextLoader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from langchain import HuggingFaceHub
	from langchain.chains.question_answering import load_qa_chain
	import streamlit as st
	input_file="/content/drive/MyDrive/Python_Code/streamlit_01/T1.txt"
	output_file="/content/drive/MyDrive/Python_Code/streamlit_01/T2.txt"
	import os
	os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["Key2"]

	################################################################################################
	def convert_to_utf8(input_file, output_file):
	try:
	# باز کردن فایل متنی با فرمت پیش‌فرض
	with open(input_file, 'r') as file:
	text = file.read()

	# نوشتن محتوای متنی در فایل جدید با فرمت UTF-8
	with open(output_file, 'w', encoding='utf-8') as file:
	file.write(text)

	print(f"تبدیل فایل {input_file} به فرمت UTF-8 با موفقیت انجام شد و در فایل {output_file} ذخیره شد.")
	except Exception as e:
	print(f"خطا در تبدیل فایل به فرمت UTF-8: {str(e)}")

	######################################################################################################

	def load_file():
	loader = TextLoader('d2.txt')
	documents = loader.load()
	return documents
	######################################################################################################
	documents=load_file()

	chunk_size=2000
	chunk_overlap=200
	def build_model(documents,chunk_size,chunk_overlap):
	# Text Splitter
	text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
	docs = text_splitter.split_documents(documents)
	# Embeddings
	embeddings = HuggingFaceEmbeddings()
	# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
	db = FAISS.from_documents(docs, embeddings)
	return db
	####################################################################################################
	db=build_model(documents,chunk_size,chunk_overlap)

	##################################################################################################
	def load_model(query,db):
	llm=HuggingFaceHub(repo_id="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"temperature":0.1, "max_length":400})
	chain = load_qa_chain(llm, chain_type="stuff")
	docs = db.similarity_search(query)
	out=chain.run(input_documents=docs, question=query)
	return out
	##############################################################################################

	def find_help(text):
	import streamlit as st
	# یافتن اولین وقوع کلمه "help"
	index_help = text.find("Helpful Answer:")

	if index_help != -1:
	# اگر "help" یافت شد، 50 کاراکتر پس از آن را بررسی کنید
	content_after_help = text[index_help + len("Helpful Answer:"):]

	# بررسی اینکه آیا "end" قبل از 50 کاراکتر بعد از "help" وجود دارد یا خیر
	index_end = content_after_help.find("Question")
	if index_end != -1 and index_end <= 400:
	# اگر "end" قبل از 50 کاراکتر بعد از "help" وجود داشت، بقیه متن را نمایش ندهید
	txtA=content_after_help[:index_end + len("Question")-8]
	print(content_after_help[:index_end + len("Question")-8])
	st.write(txtA)
	else:
	# اگر "end" وجود نداشت یا بعد از 50 کاراکتر بعد از "help" بود، بقیه متن را چاپ کنید
	txtB=content_after_help[:400]
	print(content_after_help[:400])
	st.write(txtB)
	else:
	print("کلمه 'help' در متن یافت نشد.")

	################################################################################################