Spaces:

nivakaran
/

Portfolio-Chatbot

Sleeping

App Files Files Community

Portfolio-Chatbot / src /streamlit_app.py

nivakaran

Update src/streamlit_app.py

d68a3d3 verified 5 months ago

raw

history blame contribute delete

7.1 kB

	import os
	import re
	import logging
	from uuid import uuid4
	from pathlib import Path
	from dotenv import load_dotenv
	import streamlit as st

	from langchain.chains import create_history_aware_retriever, create_retrieval_chain
	from langchain.chains.combine_documents import create_stuff_documents_chain
	from langchain_community.chat_message_histories import ChatMessageHistory
	from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
	from langchain_groq import ChatGroq
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_chroma import Chroma
	import torch

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Set up proper cache directories
	def setup_environment():
	cache_dir = Path("/tmp/cache")
	cache_dir.mkdir(exist_ok=True)
	os.environ['HF_HOME'] = str(cache_dir / "huggingface")
	os.environ['STREAMLIT_HOME'] = str(cache_dir / "streamlit")

	setup_environment()

	# Load environment variables
	load_dotenv()
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")
	PDF_PATH = os.getenv("PDF_PATH", "nivakaran.pdf") # Changed to direct filename

	# Validate environment variables
	if not all([GROQ_API_KEY]):
	st.error("Missing required environment variables")
	st.stop()

	# Verify PDF exists
	if not Path(PDF_PATH).exists():
	st.error(f"PDF file not found at: {PDF_PATH}")
	st.stop()

	# Initialize RAG components with proper device handling
	try:
	# Force CPU and disable metal for sentence-transformers
	os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
	os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0'

	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2",
	model_kwargs={'device': 'cpu'},
	encode_kwargs={'normalize_embeddings': True}
	)
	except Exception as e:
	logger.error(f"Failed to initialize embeddings: {str(e)}")
	st.error("Failed to initialize embeddings. Please try again later.")
	st.stop()

	llm = ChatGroq(model_name="Deepseek-R1-Distill-Llama-70b", temperature=0.1)

	# Process PDF into vectorstore
	def process_pdf(file_path: str):
	try:
	loader = PyPDFLoader(file_path)
	documents = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
	splits = text_splitter.split_documents(documents)

	vectorstore = Chroma.from_documents(
	documents=splits,
	embedding=embeddings,
	persist_directory="/tmp/chroma_db"
	)
	logger.info(f"PDF {file_path} processed successfully")
	return vectorstore
	except Exception as e:
	logger.error(f"Failed to process PDF: {str(e)}")
	st.error("PDF processing failed")
	st.stop()

	# Initialize vectorstore and retriever
	try:
	vectorstore = process_pdf(PDF_PATH)
	retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
	except Exception as e:
	logger.error(f"Failed to initialize vectorstore: {str(e)}")
	st.error("Failed to initialize document store. Please try again later.")
	st.stop()

	# [Rest of your existing Streamlit UI code remains the same...]

	# System prompt for the assistant
	system_prompt = """You are Max, a friendly and professional chatbot designed to
	assist visitors to Nivakaran's portfolio website. Your primary goal
	is to provide accurate, clear, and helpful information about Nivakaran, based
	on the following context:

	{context}

	Your responses should be:
	1. Informative and relevant, directly addressing the visitor's questions about Nivakaran's skills,
	projects, experience, and background.
	2. Concise but thorough enough to give visitors a clear understanding of Nivakaran's expertise.
	3. Engaging and approachable, maintaining a professional yet conversational tone.
	4. Honest about what is available in the provided context; if you don't know an answer, politely
	say so and suggest the visitor explore other sections of the portfolio or contact Nivakaran directly.
	5. Focused on helping visitors understand Nivakaran's capabilities and what makes him stand out
	as a developer and professional.
	6. Ready to provide examples, explanations, or links to portfolio projects when relevant.

	Avoid providing generic or unrelated information. Always tailor your answers to
	highlight Nivakaran's strengths and the unique value he brings.
	"""

	# Streamlit app UI
	st.set_page_config(page_title="Nivakaran's Portfolio Assistant", page_icon="💬")
	st.title("💬 Nivakaran's Portfolio Assistant")

	# Session ID and message history
	if "session_id" not in st.session_state:
	st.session_state.session_id = str(uuid4())
	if "history" not in st.session_state:
	st.session_state.history = ChatMessageHistory()

	# Display chat history
	for message in st.session_state.history.messages:
	role = "user" if message.type == "human" else "assistant"
	with st.chat_message(role):
	st.markdown(message.content)

	# User input
	if user_input := st.chat_input("Ask me something about Nivakaran..."):
	with st.chat_message("user"):
	st.markdown(user_input)
	st.session_state.history.add_user_message(user_input)

	try:
	last_messages = st.session_state.history.messages[-6:]

	# Contextualize question based on history
	contextualize_q_prompt = ChatPromptTemplate.from_messages([
	("system", "Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Return just the question and nothing else."),
	MessagesPlaceholder("chat_history"),
	("human", "{input}")
	])

	history_aware_retriever = create_history_aware_retriever(
	llm, retriever, contextualize_q_prompt
	)

	# RAG chain
	qa_prompt = ChatPromptTemplate.from_messages([
	("system", system_prompt),
	MessagesPlaceholder("chat_history"),
	("human", "{input}")
	])

	question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
	rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

	result = rag_chain.invoke({
	"input": user_input,
	"chat_history": last_messages
	})

	raw_answer = result["answer"]
	# Clean out <think>...</think> junk and any other unwanted artifacts
	cleaned_answer = re.sub(r"<think>.?</think>\s", "", raw_answer, flags=re.DOTALL).strip()
	cleaned_answer = re.sub(r"<\\|.*?\\|>", "", cleaned_answer).strip()

	with st.chat_message("assistant"):
	st.markdown(cleaned_answer)

	st.session_state.history.add_ai_message(cleaned_answer)

	except Exception as e:
	logger.error(f"Error during RAG processing: {str(e)}")
	st.error("Sorry, I encountered an error while processing your request. Please try again.")