Spaces:

ksimdeep
/

docQArag

Runtime error

App Files Files Community

ksimdeep commited on Jan 29, 2025

Commit

7e40afd

verified ·

1 Parent(s): 5dd5eb7

Create app.py

Browse files

Files changed (1) hide show

app.py +251 -0

app.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import torch  # Add missing import
+import streamlit as st
+import os
+import tempfile
+from langchain_community.document_loaders import (
+    TextLoader,
+    CSVLoader,
+    UnstructuredFileLoader
+)
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.retrievers import BM25Retriever
+from langchain.retrievers import EnsembleRetriever
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    BitsAndBytesConfig,
+    pipeline
+)
+# Configuration
+MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
+EMBEDDING_MODEL = "thenlper/gte-large"
+CHUNK_SIZE = 1024
+CHUNK_OVERLAP = 128
+MAX_NEW_TOKENS = 2048
+# Initialize session state
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+@st.cache_resource
+def initialize_model():
+    quantization_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_use_double_quant=True
+    )
+    # Load config first to modify RoPE params
+    from transformers import AutoConfig
+    config = AutoConfig.from_pretrained(
+        MODEL_NAME,
+        trust_remote_code=True
+    )
+    # Fix RoPE scaling configuration
+    if hasattr(config, "rope_scaling"):
+        config.rope_scaling = {
+            "type": config.rope_scaling.get("rope_type", "linear"),
+            "factor": config.rope_scaling.get("factor", 8.0)
+        }
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        config=config,
+        quantization_config=quantization_config,
+        device_map="auto",
+        trust_remote_code=True
+    )
+    return pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        device_map="auto",
+        max_new_tokens=MAX_NEW_TOKENS,
+        temperature=0.1
+    )
+def process_uploaded_files(uploaded_files):
+    documents = []
+    with tempfile.TemporaryDirectory() as temp_dir:
+        for file in uploaded_files:
+            temp_path = os.path.join(temp_dir, file.name)
+            with open(temp_path, "wb") as f:
+                f.write(file.getbuffer())
+            try:
+                if file.name.endswith(".txt"):
+                    loader = TextLoader(temp_path)
+                elif file.name.endswith(".csv"):
+                    loader = CSVLoader(temp_path)
+                else:
+                    loader = UnstructuredFileLoader(temp_path)
+                documents.extend(loader.load())
+            except Exception as e:
+                st.error(f"Error loading {file.name}: {str(e)}")
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=CHUNK_SIZE,
+        chunk_overlap=CHUNK_OVERLAP,
+        length_function=len
+    )
+    return text_splitter.split_documents(documents)
+def create_retriever(documents):
+    embeddings = HuggingFaceEmbeddings(
+        model_name=EMBEDDING_MODEL,
+        model_kwargs={'device': 'cuda'},
+        encode_kwargs={'normalize_embeddings': True}
+    )
+    bm25_retriever = BM25Retriever.from_documents(documents)
+    bm25_retriever.k = st.session_state.get("top_k", 5)
+    return EnsembleRetriever(
+        retrievers=[bm25_retriever],
+        weights=[0.5]
+    )
+def generate_response(query, retriever, generator):
+    docs = retriever.get_relevant_documents(query)
+    context = "\n\n".join(
+        f"[Doc{i+1}] {doc.page_content}\nSource: {doc.metadata.get('source', 'unknown')}"
+        for i, doc in enumerate(docs)
+    )
+    prompt = f"""<s>[INST] You are a precision-focused research assistant tasked with answering queries based solely on the provided context.
+**Context:**
+{context}
+**Query:**
+{query}
+**Response Instructions:**
+- Write a detailed, coherent, and insightful article that fully addresses the query based on the provided context.
+- Adhere to the following principles:
+  1. **Define the Core Subject**: Introduce and build the discussion logically around the main topic.
+  2. **Establish Connections**: Highlight relationships between ideas and concepts with reasoning and examples.
+  3. **Elaborate on Key Points**: Provide in-depth explanations and emphasize the significance of concepts.
+  4. **Maintain Objectivity**: Use only the context provided, avoiding speculation or external knowledge.
+  5. **Ensure Structure and Clarity**: Present information sequentially for a smooth narrative flow.
+  6. **Engage with Content**: Explore implicit meanings, resolve doubts, and address counterpoints logically.
+  7. **Provide Examples and Insights**: Use examples to clarify abstract ideas and offer actionable steps if applicable.
+  8. **Logical Depth**: Draw inferences, explain purposes, and refute opposing ideas when necessary.
+- Cite sources explicitly as [Doc1], [Doc2], etc.
+- If uncertain, state: "I cannot determine from the provided context."
+Craft the response as a seamless, thorough, and authoritative explanation that naturally integrates all aspects of the query. [/INST]"""
+    response = generator(
+        prompt,
+        pad_token_id=generator.tokenizer.eos_token_id,
+        do_sample=True
+    )[0]['generated_text']
+    return response.split("[/INST]")[-1].strip(), docs
+# def generate_response(query, retriever, generator):
+#     docs = retriever.get_relevant_documents(query)
+#     context = "\n\n".join(
+#         f"[Doc{i+1}] {doc.page_content}\nSource: {doc.metadata.get('source', 'unknown')}"
+#         for i, doc in enumerate(docs)
+#     )
+#     prompt = f"""<s>[INST] You are a precise research assistant. Use ONLY the provided context:
+# {context}
+# Question: {query}
+# Answer with:
+# 1. Direct facts from context
+# 2. NO speculation
+# 3. Cite sources like [Doc1]
+# 4. If unsure, say "I cannot determine this from the provided data" [/INST]"""
+#     response = generator(
+#         prompt,
+#         pad_token_id=generator.tokenizer.eos_token_id,
+#         do_sample=True
+#     )[0]['generated_text']
+#     return response.split("[/INST]")[-1].strip(), docs
+# Streamlit UI
+st.title("📚 Document-Based QA Assistant")
+st.markdown("Upload your documents and ask questions!")
+# Sidebar controls
+with st.sidebar:
+    st.header("Configuration")
+    uploaded_files = st.file_uploader(
+        "Upload documents (TXT)",
+        type=["txt", "csv"],
+        accept_multiple_files=True
+    )
+    st.session_state.top_k = st.slider("Number of documents to retrieve", 3, 10, 5)
+    st.markdown("---")
+    st.markdown("Powered by Mistral-7B and LangChain")
+# Main chat interface
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+        if "sources" in message:
+            with st.expander("View Sources"):
+                for i, doc in enumerate(message["sources"]):
+                    st.markdown(f"**Doc{i+1}** ({doc.metadata.get('source', 'unknown')})")
+                    st.info(doc.page_content)
+# Process documents
+if uploaded_files and "retriever" not in st.session_state:
+    with st.spinner("Processing documents..."):
+        documents = process_uploaded_files(uploaded_files)
+        st.session_state.retriever = create_retriever(documents)
+        st.session_state.generator = initialize_model()
+if prompt := st.chat_input("Ask a question about your documents"):
+    # Add user message
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    # Generate response
+    if "retriever" not in st.session_state:
+        st.error("Please upload documents first!")
+        st.stop()
+    with st.spinner("Analyzing documents..."):
+        try:
+            response, sources = generate_response(
+                prompt,
+                st.session_state.retriever,
+                st.session_state.generator
+            )
+            # Add assistant response
+            st.session_state.messages.append({
+                "role": "assistant",
+                "content": response,
+                "sources": sources
+            })
+            # Display response
+            with st.chat_message("assistant"):
+                st.markdown(response)
+                with st.expander("View Document Sources"):
+                    for i, doc in enumerate(sources):
+                        st.markdown(f"**Doc{i+1}** ({doc.metadata.get('source', 'unknown')})")
+                        st.info(doc.page_content)
+        except Exception as e:
+            st.error(f"Error generating response: {str(e)}")