Spaces:

random2222
/

trykro

Sleeping

App Files Files Community

trykro / app.py

random2222

Update app.py

89fbd84 verified 10 months ago

raw

history blame contribute delete

2.95 kB

	import os
	import gradio as gr
	from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
	from langchain_text_splitters import CharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.chains import RetrievalQA
	from langchain_community.llms import HuggingFacePipeline
	from transformers import pipeline, AutoTokenizer

	def load_documents(file_path="study_materials"):
	documents = []
	for filename in os.listdir(file_path):
	path = os.path.join(file_path, filename)
	if filename.endswith(".pdf"):
	loader = PyMuPDFLoader(path)
	documents.extend(loader.load())
	elif filename.endswith(".txt"):
	loader = TextLoader(path)
	documents.extend(loader.load())
	return documents

	def create_qa_system():
	try:
	# Load documents
	documents = load_documents()
	if not documents:
	raise ValueError("📚 No study materials found")

	# Text splitting
	text_splitter = CharacterTextSplitter(
	chunk_size=1100,
	chunk_overlap=200,
	separator="\n\n"
	)
	texts = text_splitter.split_documents(documents)

	# Embeddings
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2"
	)

	# Vector store
	db = FAISS.from_documents(texts, embeddings)

	# LLM setup with proper LangChain wrapper
	tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
	pipe = pipeline(
	"text2text-generation",
	model="google/flan-t5-base",
	tokenizer=tokenizer,
	max_length=600,
	temperature=0.7,
	do_sample=True,
	top_k=50,
	device=-1
	)

	# Wrap pipeline in LangChain component
	llm = HuggingFacePipeline(pipeline=pipe)

	# Create QA chain
	return RetrievalQA.from_llm(
	llm=llm,
	retriever=db.as_retriever(search_kwargs={"k": 3}),
	return_source_documents=True
	)
	except Exception as e:
	raise gr.Error(f"Error: {str(e)}")

	# Initialize system
	try:
	qa = create_qa_system()
	except Exception as e:
	print(f"Startup failed: {str(e)}")
	raise

	def ask_question(question, history):
	try:
	result = qa.invoke({"query": question})
	answer = result["result"]
	sources = list({doc.metadata['source'] for doc in result['source_documents']})
	return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
	except Exception as e:
	return f"Error: {str(e)[:150]}"

	gr.ChatInterface(
	ask_question,
	title="Study Assistant",
	description="Upload PDF/TXT files in 'study_materials' folder and ask questions!",
	theme="soft"
	).launch()