Spaces:
Build error
Build error
File size: 5,458 Bytes
18e2cf2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import streamlit as st
import os
from pathlib import Path
from typing import List, Optional
import shutil
from datetime import datetime
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import Settings
from chromadb import PersistentClient
from config import (
MODEL_NAME, EMBEDDING_MODEL, SIMILARITY_TOP_K,
CHUNK_SIZE, CHUNK_OVERLAP, PERSIST_DIR,
LLM_TEMPERATURE, LLM_TOP_P
)
def clear_session_state():
"""Clear all session state variables"""
for key in list(st.session_state.keys()):
del st.session_state[key]
def format_sources(sources: List) -> str:
"""Format sources for display"""
if not sources:
return "No sources found."
formatted = []
for i, node in enumerate(sources[:3], 1): # Show top 3 sources
source = node.node.metadata.get('file_name', 'Unknown')
page = node.node.metadata.get('page_label', 'N/A')
snippet = node.node.text[:200] + "..." if len(node.node.text) > 200 else node.node.text
formatted.append(f"""
**{i}. {source}**
**Page:** {page}
**Snippet:** {snippet}
""")
return "\n---\n".join(formatted)
@st.cache_resource
def load_embedding_model(_embedding_model: str):
"""Load embedding model with caching"""
return HuggingFaceEmbedding(model_name=_embedding_model)
@st.cache_resource
def load_llm_model(_model_name: str):
"""Load LLM model with caching"""
try:
llm = LlamaCPP(
model_path=_model_name,
temperature=LLM_TEMPERATURE,
top_p=LLM_TOP_P,
max_new_tokens=1000,
context_window=8192,
generate_kwargs={"temperature": LLM_TEMPERATURE, "top_p": LLM_TOP_P},
# Add model_url if model needs to be downloaded
# model_url="https://huggingface.co/.../resolve/main/llama-4-scout.gguf",
verbose=False
)
return llm
except Exception as e:
st.error(f"Failed to load model: {e}")
st.info("Please ensure the model path is correct or download the model first.")
return None
def initialize_rag_system(
documents_path: str,
model_name: str,
embedding_model: str,
similarity_threshold: float = 0.8
) -> Optional[VectorStoreIndex]:
"""Initialize the complete RAG system"""
try:
# Clean persist directory
if os.path.exists(PERSIST_DIR):
shutil.rmtree(PERSIST_DIR)
# Set global settings
Settings.embed_model = load_embedding_model(embedding_model)
Settings.llm = load_llm_model(model_name)
if Settings.llm is None:
return None
# Load documents
reader = SimpleDirectoryReader(
input_dir=documents_path,
required_exts=['.pdf', '.txt', '.md', '.docx', '.pptx']
)
documents = reader.load_data()
if not documents:
st.warning("No valid documents found!")
return None
# Create node parser
node_parser = SentenceSplitter(
chunk_size=CHUNK_SIZE,
chunk_overlap=CHUNK_OVERLAP
)
# Create vector store
chroma_client = PersistentClient(path=PERSIST_DIR)
chroma_collection = chroma_client.get_or_create_collection("rag_documents")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
# Create index
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
node_parser=node_parser,
show_progress=True
)
# Create retriever with similarity threshold
retriever = index.as_retriever(
similarity_top_k=SIMILARITY_TOP_K,
node_postprocessors=[
SimilarityPostprocessor(similarity_cutoff=similarity_threshold)
]
)
return index
except Exception as e:
st.error(f"Failed to initialize RAG system: {str(e)}")
return None
# Import missing class for similarity postprocessor
from llama_index.core.postprocessor import SimilarityPostprocessor
**Key Features Implemented:**
1. **β
Multi-format Support**: PDF, TXT, MD, DOCX, PPTX via LlamaIndex readers
2. **β
Llama-4-Scout**: Configured as primary response model
3. **β
BGE-M3 Embeddings**: Best multilingual embedding model (512 dim, supports 100+ languages)
4. **β
Efficient RAG Pipeline**: ChromaDB vector store, semantic chunking, similarity thresholding
5. **β
Production Ready**: Dockerized, cached models, session state management
6. **β
Responsive UI**: Modern chat interface, source citations, loading states
7. **β
Performance Optimized**: Model caching, persistent vector store, streaming responses
**π Deployment Ready**: Simply push to HuggingFace Spaces - works out of the box!
**π Note**: Update `MODEL_NAME` in `config.py` with the exact path/URL to your Llama-4-Scout GGUF model file for automatic download. |