Spaces:

JumaRubea
/

chatbot

Sleeping

App Files Files Community

JumaRubea commited on Sep 15, 2025

Commit

ac8c947

verified ·

1 Parent(s): 9d3e1f5

Upload 4 files

Browse files

Files changed (4) hide show

src/app.py +59 -0
src/me.txt +32 -0
src/rag_components.py +51 -0
src/requirements.txt +4 -0

src/app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import streamlit as st
+from rag_components import load_documents, split_documents, create_embeddings, setup_vector_store, create_qa_chain
+import os
+st.set_page_config(page_title="Document Chatbot")
+st.title("Chat with your Documents")
+@st.cache_resource
+def initialize_rag_components(file_path="me.txt"):
+    """Initializes and caches RAG components."""
+    if not os.path.exists(file_path):
+        st.error(f"Error: Document file not found at {file_path}")
+        return None, None
+    documents = load_documents(file_path)
+    docs = split_documents(documents)
+    embeddings = create_embeddings()
+    retriever = setup_vector_store(docs, embeddings)
+    qa_chain = create_qa_chain(retriever)
+    return qa_chain, retriever
+qa_chain, retriever = initialize_rag_components()
+if qa_chain is not None:
+    # Initialize chat history
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    # Display chat messages from history on app rerun
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    # React to user input
+    if prompt := st.chat_input("Ask me a question about the document"):
+        # Display user message in chat message container
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        # Display assistant response in chat message container
+        with st.chat_message("assistant"):
+            message_placeholder = st.empty()
+            full_response = ""
+            try:
+                # Assuming qa_chain.stream() yields dictionaries with a 'result' key
+                for chunk in qa_chain.stream(prompt):
+                    if 'result' in chunk:
+                        full_response += chunk['result']
+                        message_placeholder.markdown(full_response + "▌")
+                message_placeholder.markdown(full_response)
+            except Exception as e:
+                st.error(f"An error occurred: {e}")
+                full_response = "Sorry, I could not process your request."
+        # Add assistant response to chat history
+        st.session_state.messages.append({"role": "assistant", "content": full_response})
+else:
+    st.warning("RAG components could not be initialized. Please check the document file path.")

src/me.txt ADDED Viewed

	@@ -0,0 +1,32 @@

+# About Me
+My name is Juma Rubea. I am passionate about  artificial intelligence, software development, and data science.
+I currently live in Dar es Salaam,  Tanzania, and work as a Junior Data Scientist.
+# Skills and Expertise
+- Programming Languages: Python, AI, ML, Data Science
+- AI/ML Tools: LangChain, Hugging Face Transformers, PyTorch, TensorFlow
+- Databases: PostgreSQL, MongoDB, Chroma, FAISS
+- Cloud & DevOps: AWS, Docker, Kubernetes
+# Education
+I studied [Your Degree, e.g., Computer Science] at [Your University].
+I have taken specialized courses in machine learning, natural language processing, and cloud computing.
+# Professional Experience
+- Data Science at SkyConnect 2 years
+  - Worked on computer vision
+  - Built Sevia using MaskRCNN, DeepLab3v etc.
+# Projects
+- Chatbot Development: Created a chatbot using LangChain and Hugging Face.
+- RAG Systems: Implemented retrieval-augmented generation pipelines with TinyLlama.
+- Data Engineering: Built data pipelines for structured and unstructured data.
+# Hobbies & Interests
+In my free time, I enjoy reading tech blogs, playing chess, traveling, open-source contributions, swimming.
+# Contact
+- Email: rubeajuma8@gmail.com
+- GitHub: github.jumarubea.com
+- LinkedIn: link.jumarubea.com

src/rag_components.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from langchain.vectorstores import Chroma
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import TextLoader
+from langchain_huggingface import HuggingFacePipeline
+from langchain.chains import RetrievalQA
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+def load_documents(file_path: str):
+    """Loads documents from a specified file path."""
+    loader = TextLoader(file_path)
+    return loader.load()
+def split_documents(documents, chunk_size=500, chunk_overlap=50):
+    """Splits documents into chunks."""
+    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    return splitter.split_documents(documents)
+def create_embeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"):
+    """Creates HuggingFace embeddings."""
+    return HuggingFaceEmbeddings(model_name=model_name)
+def setup_vector_store(docs, embeddings, persist_directory="./chroma_db"):
+    """Sets up and persists the Chroma vector store."""
+    db = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory)
+    return db.as_retriever()
+def create_qa_chain(retriever, model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0"):
+    """Creates the RetrievalQA chain with streaming capabilities."""
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        device_map="auto",
+    )
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=512,
+        temperature=0.7,
+        top_p=0.9
+    )
+    llm = HuggingFacePipeline(pipeline=pipe)
+    qa_chain = RetrievalQA.from_chain_type(
+        llm=llm,
+        retriever=retriever,
+        chain_type="stuff",
+        return_source_documents=True # Added to potentially help with streaming or understanding context
+    )
+    return qa_chain

src/requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+langchain==0.3.27
+langchain_huggingface==0.3.1
+streamlit==1.49.1
+transformers==4.56.1