Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import logging | |
| from uuid import uuid4 | |
| from pathlib import Path | |
| from dotenv import load_dotenv | |
| import streamlit as st | |
| from langchain.chains import create_history_aware_retriever, create_retrieval_chain | |
| from langchain.chains.combine_documents import create_stuff_documents_chain | |
| from langchain_community.chat_message_histories import ChatMessageHistory | |
| from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder | |
| from langchain_groq import ChatGroq | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_chroma import Chroma | |
| import torch | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Set up proper cache directories | |
| def setup_environment(): | |
| cache_dir = Path("/tmp/cache") | |
| cache_dir.mkdir(exist_ok=True) | |
| os.environ['HF_HOME'] = str(cache_dir / "huggingface") | |
| os.environ['STREAMLIT_HOME'] = str(cache_dir / "streamlit") | |
| setup_environment() | |
| # Load environment variables | |
| load_dotenv() | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| PDF_PATH = os.getenv("PDF_PATH", "nivakaran.pdf") # Changed to direct filename | |
| # Validate environment variables | |
| if not all([GROQ_API_KEY]): | |
| st.error("Missing required environment variables") | |
| st.stop() | |
| # Verify PDF exists | |
| if not Path(PDF_PATH).exists(): | |
| st.error(f"PDF file not found at: {PDF_PATH}") | |
| st.stop() | |
| # Initialize RAG components with proper device handling | |
| try: | |
| # Force CPU and disable metal for sentence-transformers | |
| os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' | |
| os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0' | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={'device': 'cpu'}, | |
| encode_kwargs={'normalize_embeddings': True} | |
| ) | |
| except Exception as e: | |
| logger.error(f"Failed to initialize embeddings: {str(e)}") | |
| st.error("Failed to initialize embeddings. Please try again later.") | |
| st.stop() | |
| llm = ChatGroq(model_name="Deepseek-R1-Distill-Llama-70b", temperature=0.1) | |
| # Process PDF into vectorstore | |
| def process_pdf(file_path: str): | |
| try: | |
| loader = PyPDFLoader(file_path) | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500) | |
| splits = text_splitter.split_documents(documents) | |
| vectorstore = Chroma.from_documents( | |
| documents=splits, | |
| embedding=embeddings, | |
| persist_directory="/tmp/chroma_db" | |
| ) | |
| logger.info(f"PDF {file_path} processed successfully") | |
| return vectorstore | |
| except Exception as e: | |
| logger.error(f"Failed to process PDF: {str(e)}") | |
| st.error("PDF processing failed") | |
| st.stop() | |
| # Initialize vectorstore and retriever | |
| try: | |
| vectorstore = process_pdf(PDF_PATH) | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) | |
| except Exception as e: | |
| logger.error(f"Failed to initialize vectorstore: {str(e)}") | |
| st.error("Failed to initialize document store. Please try again later.") | |
| st.stop() | |
| # [Rest of your existing Streamlit UI code remains the same...] | |
| # System prompt for the assistant | |
| system_prompt = """You are Max, a friendly and professional chatbot designed to | |
| assist visitors to Nivakaran's portfolio website. Your primary goal | |
| is to provide accurate, clear, and helpful information about Nivakaran, based | |
| on the following context: | |
| {context} | |
| Your responses should be: | |
| 1. Informative and relevant, directly addressing the visitor's questions about Nivakaran's skills, | |
| projects, experience, and background. | |
| 2. Concise but thorough enough to give visitors a clear understanding of Nivakaran's expertise. | |
| 3. Engaging and approachable, maintaining a professional yet conversational tone. | |
| 4. Honest about what is available in the provided context; if you don't know an answer, politely | |
| say so and suggest the visitor explore other sections of the portfolio or contact Nivakaran directly. | |
| 5. Focused on helping visitors understand Nivakaran's capabilities and what makes him stand out | |
| as a developer and professional. | |
| 6. Ready to provide examples, explanations, or links to portfolio projects when relevant. | |
| Avoid providing generic or unrelated information. Always tailor your answers to | |
| highlight Nivakaran's strengths and the unique value he brings. | |
| """ | |
| # Streamlit app UI | |
| st.set_page_config(page_title="Nivakaran's Portfolio Assistant", page_icon="π¬") | |
| st.title("π¬ Nivakaran's Portfolio Assistant") | |
| # Session ID and message history | |
| if "session_id" not in st.session_state: | |
| st.session_state.session_id = str(uuid4()) | |
| if "history" not in st.session_state: | |
| st.session_state.history = ChatMessageHistory() | |
| # Display chat history | |
| for message in st.session_state.history.messages: | |
| role = "user" if message.type == "human" else "assistant" | |
| with st.chat_message(role): | |
| st.markdown(message.content) | |
| # User input | |
| if user_input := st.chat_input("Ask me something about Nivakaran..."): | |
| with st.chat_message("user"): | |
| st.markdown(user_input) | |
| st.session_state.history.add_user_message(user_input) | |
| try: | |
| last_messages = st.session_state.history.messages[-6:] | |
| # Contextualize question based on history | |
| contextualize_q_prompt = ChatPromptTemplate.from_messages([ | |
| ("system", "Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Return just the question and nothing else."), | |
| MessagesPlaceholder("chat_history"), | |
| ("human", "{input}") | |
| ]) | |
| history_aware_retriever = create_history_aware_retriever( | |
| llm, retriever, contextualize_q_prompt | |
| ) | |
| # RAG chain | |
| qa_prompt = ChatPromptTemplate.from_messages([ | |
| ("system", system_prompt), | |
| MessagesPlaceholder("chat_history"), | |
| ("human", "{input}") | |
| ]) | |
| question_answer_chain = create_stuff_documents_chain(llm, qa_prompt) | |
| rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain) | |
| result = rag_chain.invoke({ | |
| "input": user_input, | |
| "chat_history": last_messages | |
| }) | |
| raw_answer = result["answer"] | |
| # Clean out <think>...</think> junk and any other unwanted artifacts | |
| cleaned_answer = re.sub(r"<think>.*?</think>\s*", "", raw_answer, flags=re.DOTALL).strip() | |
| cleaned_answer = re.sub(r"<\|.*?\|>", "", cleaned_answer).strip() | |
| with st.chat_message("assistant"): | |
| st.markdown(cleaned_answer) | |
| st.session_state.history.add_ai_message(cleaned_answer) | |
| except Exception as e: | |
| logger.error(f"Error during RAG processing: {str(e)}") | |
| st.error("Sorry, I encountered an error while processing your request. Please try again.") |