Spaces:

udituen
/

docsqa

Sleeping

App Files Files Community

udituen commited on Nov 17, 2025

Commit

362de84

1 Parent(s): ec4695f

code refactor

Browse files

Files changed (9) hide show

src/streamlit_app.py → app_archive.py +0 -0
chains/qa_chain.py +31 -0
config.py +25 -0
models/llm_loader.py +49 -0
models/retriever.py +20 -0
streamlit_app.py +40 -0
ui/chat.py +83 -0
ui/sidebar.py +88 -0
utils/doc_processor.py +49 -0

src/streamlit_app.py → app_archive.py RENAMED Viewed

File without changes

chains/qa_chain.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""Conversational QA chain setup."""
+from langchain.chains import ConversationalRetrievalChain
+from langchain.memory import ConversationBufferMemory
+def create_qa_chain(llm, retriever):
+    """
+    Create conversational QA chain with memory.
+    Args:
+        llm: Language model
+        retriever: Document retriever
+    Returns:
+        ConversationalRetrievalChain
+    """
+    memory = ConversationBufferMemory(
+        memory_key="chat_history",
+        return_messages=True,
+        output_key="answer"
+    )
+    chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=retriever,
+        memory=memory,
+        return_source_documents=True
+    )
+    return chain

config.py ADDED Viewed

	@@ -0,0 +1,25 @@

+"""Configuration settings for the RAG application."""
+SAMPLE_TEXT = """Fertilizers help improve soil nutrients and crop yield.
+Irrigation methods vary depending on climate and crop type.
+Crop rotation can enhance soil health and reduce pests.
+Composting is an organic way to enrich the soil.
+Weed management is essential for higher productivity."""
+EXAMPLE_QUESTIONS = [
+    "What is this document about?",
+    "What is the role of fertilizers in agriculture?",
+    "Why is crop rotation important?",
+    "How does composting help farming?",
+]
+# Model configurations
+QWEN_MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
+EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
+# Generation parameters
+MAX_NEW_TOKENS = 256
+TEMPERATURE = 0.7
+TOP_P = 0.95

models/llm_loader.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""LLM loading and initialization."""
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from langchain_community.llms import HuggingFacePipeline
+import streamlit as st
+@st.cache_resource
+def load_qwen_llm(model_name, max_new_tokens=256, temperature=0.7, top_p=0.95):
+    """
+    Load Qwen LLM model.
+    Args:
+        model_name: HuggingFace model identifier
+        max_new_tokens: Maximum tokens to generate
+        temperature: Sampling temperature
+        top_p: Nucleus sampling parameter
+    Returns:
+        HuggingFacePipeline: Wrapped LLM for LangChain
+    """
+    # Load tokenizer and model
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_name,
+        trust_remote_code=True
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto" if torch.cuda.is_available() else None,
+        trust_remote_code=True
+    )
+    # Create pipeline
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        do_sample=True,
+        return_full_text=False
+    )
+    return HuggingFacePipeline(pipeline=pipe)

models/retriever.py ADDED Viewed

	@@ -0,0 +1,20 @@

+"""Document retrieval system."""
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+def build_retriever(docs, embedding_model_name="all-MiniLM-L6-v2"):
+    """
+    Build FAISS retriever from documents.
+    Args:
+        docs: List of text documents
+        embedding_model_name: Name of the embedding model
+    Returns:
+        Retriever object
+    """
+    embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
+    db = FAISS.from_texts(docs, embeddings)
+    return db.as_retriever()

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+"""Main Streamlit application."""
+import streamlit as st
+from ui.sidebar import render_sidebar
+from ui.chat import render_chat_interface
+def initialize_session_state():
+    """Initialize session state variables."""
+    if 'chat_history' not in st.session_state:
+        st.session_state.chat_history = []
+    if 'qa_chain' not in st.session_state:
+        st.session_state.qa_chain = None
+    if 'document_processed' not in st.session_state:
+        st.session_state.document_processed = False
+def main():
+    """Main application entry point."""
+    # Page configuration
+    st.set_page_config(
+        page_title="DocsQA",
+        page_icon="",
+        layout="wide"
+    )
+    # Initialize session state
+    initialize_session_state()
+    # Header
+    st.title("DocsQA: Chat with Your Document")
+    st.markdown("Upload a document and have a conversation about its contents! (Powered by Qwen)")
+    # Render UI components
+    render_sidebar()
+    render_chat_interface()
+if __name__ == "__main__":
+    main()

ui/chat.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""Chat interface components."""
+import streamlit as st
+def render_chat_interface():
+    """Render the main chat interface."""
+    if not st.session_state.document_processed:
+        st.info("<-- Please upload a document in the sidebar and click 'Process Document' to start chatting!")
+        return
+    # Display chat history
+    _display_chat_history()
+    # Handle new user input
+    _handle_user_input()
+def _display_chat_history():
+    """Display all messages in chat history."""
+    for message in st.session_state.chat_history:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+            # Show sources if available
+            if message["role"] == "assistant" and "sources" in message:
+                with st.expander("View Sources"):
+                    for i, source in enumerate(message["sources"]):
+                        st.markdown(f"**Source {i+1}:** {source}")
+def _handle_user_input():
+    """Handle new user input and generate response."""
+    if prompt := st.chat_input("Ask a question about your document..."):
+        # Add user message
+        st.session_state.chat_history.append({
+            "role": "user",
+            "content": prompt
+        })
+        # Display user message
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        # Generate and display response
+        _generate_response(prompt)
+def _generate_response(prompt):
+    """Generate AI response to user prompt."""
+    with st.chat_message("assistant"):
+        with st.spinner("Thinking..."):
+            try:
+                result = st.session_state.qa_chain({"question": prompt})
+                answer = result["answer"]
+                sources = [
+                    doc.page_content
+                    for doc in result.get("source_documents", [])
+                ]
+                st.markdown(answer)
+                # Show sources
+                if sources:
+                    with st.expander("View Sources"):
+                        for i, source in enumerate(sources):
+                            st.markdown(f"**Source {i+1}:** {source}")
+                # Add to chat history
+                st.session_state.chat_history.append({
+                    "role": "assistant",
+                    "content": answer,
+                    "sources": sources
+                })
+            except Exception as e:
+                error_msg = f"Sorry, I encountered an error: {str(e)}"
+                st.error(error_msg)
+                st.session_state.chat_history.append({
+                    "role": "assistant",
+                    "content": error_msg
+                })

ui/sidebar.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""Sidebar UI components."""
+import streamlit as st
+from config import SAMPLE_TEXT, EXAMPLE_QUESTIONS
+from utils.document_processor import read_uploaded_file
+from models.retriever import build_retriever
+from models.llm_loader import load_qwen_llm
+from chains.qa_chain import create_qa_chain
+from config import QWEN_MODEL_NAME, EMBEDDING_MODEL_NAME, MAX_NEW_TOKENS, TEMPERATURE, TOP_P
+def render_sidebar():
+    """Render the sidebar with upload and controls."""
+    with st.sidebar:
+        st.header("📄 Document Upload")
+        # Sample file download
+        st.download_button(
+            label="📄 Download Sample File",
+            data=SAMPLE_TEXT,
+            file_name="sample_agri.txt",
+            mime="text/plain"
+        )
+        # File uploader
+        uploaded_file = st.file_uploader(
+            "Upload your file",
+            type=["txt", "pdf"]
+        )
+        if uploaded_file is not None:
+            st.success(f"{uploaded_file.name}")
+            _handle_document_upload(uploaded_file)
+        # Example questions
+        if st.session_state.document_processed:
+            _render_example_questions()
+        # Clear chat button
+        if st.session_state.chat_history:
+            _render_clear_button()
+def _handle_document_upload(uploaded_file):
+    """Handle document processing."""
+    if st.button("Process Document", type="primary"):
+        with st.spinner("Processing document..."):
+            try:
+                docs = read_uploaded_file(uploaded_file)
+                if len(docs) > 0:
+                    retriever = build_retriever(docs, EMBEDDING_MODEL_NAME)
+                    llm = load_qwen_llm(
+                        QWEN_MODEL_NAME,
+                        MAX_NEW_TOKENS,
+                        TEMPERATURE,
+                        TOP_P
+                    )
+                    st.session_state.qa_chain = create_qa_chain(llm, retriever)
+                    st.session_state.document_processed = True
+                    st.session_state.chat_history = []
+                    st.success(f"Processed {len(docs)} text chunks!")
+                    st.rerun()
+                else:
+                    st.error("No content found in file.")
+            except Exception as e:
+                st.error(f"Error: {str(e)}")
+def _render_example_questions():
+    """Render example question buttons."""
+    st.markdown("---")
+    st.subheader("💡 Example Questions")
+    for q in EXAMPLE_QUESTIONS:
+        if st.button(q, key=f"example_{q}"):
+            st.session_state.user_input = q
+            st.rerun()
+def _render_clear_button():
+    """Render clear chat history button."""
+    st.markdown("---")
+    if st.button("🗑️ Clear Chat History"):
+        st.session_state.chat_history = []
+        st.rerun()

utils/doc_processor.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""Utilities for processing uploaded documents."""
+import io
+try:
+    from pypdf import PdfReader
+except ImportError:
+    from PyPDF2 import PdfReader
+def read_uploaded_file(uploaded_file):
+    """
+    Read and process uploaded file (TXT or PDF).
+    Args:
+        uploaded_file: Streamlit UploadedFile object
+    Returns:
+        list: List of text chunks from the document
+    """
+    uploaded_file.seek(0)
+    if uploaded_file.type == "application/pdf":
+        return process_pdf(uploaded_file)
+    else:
+        return process_text(uploaded_file)
+def process_pdf(uploaded_file):
+    """Extract text from PDF file."""
+    pdf_reader = PdfReader(io.BytesIO(uploaded_file.read()))
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text() + "\n"
+    return split_into_chunks(text)
+def process_text(uploaded_file):
+    """Read text file."""
+    text = uploaded_file.read().decode("utf-8")
+    return split_into_chunks(text)
+def split_into_chunks(text):
+    """Split text into chunks by lines."""
+    docs = text.split("\n")
+    docs = [doc.strip() for doc in docs if doc.strip()]
+    return docs