Spaces:

random2222
/

trry

Sleeping

App Files Files Community

trry / app.py

random2222

Update app.py

ecbad47 verified 10 months ago

raw

history blame contribute delete

2.44 kB

	import os
	import gradio as gr
	from langchain_community.vectorstores import FAISS
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.document_loaders import PyMuPDFLoader
	from langchain_text_splitters import CharacterTextSplitter
	from langchain.chains import RetrievalQA
	from langchain_community.llms import HuggingFacePipeline
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

	def create_qa_system():
	try:
	# Validate PDF
	if not os.path.exists("file.pdf"):
	raise FileNotFoundError("Upload PDF via Files tab")

	# Process PDF
	loader = PyMuPDFLoader("file.pdf")
	documents = loader.load()
	if len(documents) == 0:
	raise ValueError("PDF is empty or corrupted")

	# Split text
	text_splitter = CharacterTextSplitter(
	chunk_size=300,
	chunk_overlap=50
	)
	texts = text_splitter.split_documents(documents)

	# Create embeddings
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2"
	)

	# Build vector store
	db = FAISS.from_documents(texts, embeddings)

	# Initialize local model with LangChain wrapper
	model_name = "google/flan-t5-small"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

	pipe = pipeline(
	"text2text-generation",
	model=model,
	tokenizer=tokenizer,
	max_length=128,
	temperature=0.2,
	device_map="auto"
	)

	llm = HuggingFacePipeline(pipeline=pipe)

	return RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=db.as_retriever(search_kwargs={"k": 2}))

	except Exception as e:
	raise gr.Error(f"Initialization failed: {str(e)}")

	# Initialize system
	try:
	qa = create_qa_system()
	except Exception as e:
	print(f"Fatal error: {str(e)}")
	raise

	def chat_response(message, history):
	try:
	response = qa({"query": message})
	return response["result"]
	except Exception as e:
	print(f"Error during query: {str(e)}")
	return f"⚠️ Error: {str(e)[:100]}"

	gr.ChatInterface(chat_response).launch()