Spaces:

OpenRAG128
/

PromptGuru.OpenRAG

Sleeping

App Files Files Community

PromptGuru.OpenRAG / app.py

OpenRAG128

Update app.py

90e0ccd verified almost 2 years ago

raw

history blame

3.67 kB

	import streamlit as st
	from langchain_community.document_loaders import PyPDFDirectoryLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from langchain_groq import ChatGroq



	loader = PyPDFDirectoryLoader("Dataset")
	docx = loader.load()
	#st.write("Number of documents loaded:", len(docx))


	text_sp = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	chunks = text_sp.split_documents(docx)
	#st.write("Number of chunks created:", len(chunks))


	if not chunks:
	st.error("No chunks were created. Please check the documents or text splitter settings.")
	else:
	# Step 3: Create metadata
	metadatas = [{"source": f"{i}-pl"} for i in range(len(chunks))]


	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
	model_kwargs={'device': "cpu"})


	try:
	sample_embedding = embeddings.embed_documents([chunks[0].page_content])
	#st.write("Sample embedding length:", len(sample_embedding[0]))
	except Exception as e:
	st.error(f"Error in generating embeddings: {str(e)}")


	try:
	vector_store = FAISS.from_documents(chunks, embeddings)
	#st.write("Vector store created successfully.")
	except IndexError as e:
	st.error("IndexError in creating vector store: " + str(e))
	st.write("Check if chunks and embeddings are non-empty and correctly formatted.")
	st.write("Chunks:", chunks)
	except Exception as e:
	st.error(f"An error occurred: {str(e)}")

	llm = ChatGroq(model_name="mixtral-8x7b-32768", groq_api_key="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")

	def retrieve(query, vector_store, embeddings):
	query_embedding = embeddings.embed_query(query)
	results = vector_store.similarity_search_by_vector(query_embedding, k=5)
	return results

	from langchain.schema import HumanMessage, SystemMessage

	def generate_response(query, retrieved_docs, llm):
	context = " ".join([doc.page_content for doc in retrieved_docs])
	messages = [
	SystemMessage(content="You are an expert in prompt engineering."),
	HumanMessage(content=f"Context: {context}\n\nQuestion: {query}\n\nAnswer:")
	]
	response = llm(messages)
	return response.content.strip()



	st.title("PromptGuru 🚀📖")

	st.sidebar.markdown("PromptGuru By OpenRAG 🗣️")
	st.sidebar.markdown(
	"""
	PromptGuru is a tool you can use for asking any queries related Prompt Engineering and Get it solved within a couple of minutes.
	"""
	)

	st.sidebar.markdown(
	"""
	Note -- This tool is in a beta stage. Kindly have some patience while generating the response and give the model time to think.
	"""
	)

	st.sidebar.markdown(
	"""
	📧 Get in Touch

	For inquiries or collaboration proposals, please don't hesitate to reach out to us:
	📩 Email: openrag189@gmail.com
	🔗 LinkedIn: [OpenRAG](https://www.linkedin.com/company/102036854/admin/dashboard/)
	📸 Instagram: [OpenRAG](https://www.instagram.com/open.rag?igsh=MnFwMHd5cjU1OGFj)

	Experience the future of Human-Chatbot Interaction with OpenRAG.
	"""

	)

	query = st.text_input("Ask your question about prompt engineering:")
	if query:
	with st.spinner("Retrieving documents..."):
	retrieved_docs = retrieve(query, vector_store, embeddings)
	# st.write("Retrieved Documents:", retrieved_docs)

	with st.spinner("Generating response..."):
	response = generate_response(query, retrieved_docs, llm)
	st.write("Response:", response)