Spaces:

nivakaran
/

FreeRag

No application file

App Files Files Community

GitHub Actions commited on Jan 1

Commit

c9622da

0 Parent(s):

Deploy from GitHub Actions

Browse files

Files changed (22) hide show

.gitignore +37 -0
.python-version +1 -0
README.md +37 -0
app.py +222 -0
main.py +131 -0
pyproject.toml +18 -0
requirements.txt +23 -0
src/__init__.py +6 -0
src/config.py +58 -0
src/document_loader/__init__.py +6 -0
src/document_loader/loader.py +152 -0
src/document_loader/splitter.py +103 -0
src/embeddings/__init__.py +5 -0
src/embeddings/sentence_embeddings.py +77 -0
src/llm/__init__.py +5 -0
src/llm/phi_model.py +121 -0
src/rag/__init__.py +6 -0
src/rag/pipeline.py +177 -0
src/rag/retriever.py +54 -0
src/vectorstore/__init__.py +5 -0
src/vectorstore/chroma_store.py +136 -0
uv.lock +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,37 @@

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv
+venv/
+env/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# Project specific
+chroma_db/
+data/
+*.gguf
+*.bin
+# Cache
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+# OS
+.DS_Store
+Thumbs.db
+# Large model files (should not be in git)
+models/
+*.safetensors

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11

README.md ADDED Viewed

	@@ -0,0 +1,37 @@

+---
+title: FreeRAG
+emoji: 🚀
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 4.0.0
+app_file: app.py
+pinned: false
+license: mit
+---
+# FreeRAG - Local RAG System
+A modular Retrieval Augmented Generation (RAG) system powered by Phi-3.5-mini.
+## Features
+- 📄 **Multi-format support**: PDF, DOCX, TXT, Markdown
+- 🔍 **Semantic search**: ChromaDB vector store with sentence-transformers
+- 🤖 **Local LLM**: Phi-3.5-mini running via llama-cpp
+- 💬 **Interactive chat**: Ask questions about your documents
+- 🎨 **Modern UI**: Clean Gradio interface
+## Usage
+1. Upload your documents using the file upload panel
+2. Wait for processing to complete
+3. Ask questions in the chat interface
+4. Get AI-powered answers with source citations
+## Tech Stack
+- **LLM**: Phi-3.5-mini (GGUF via llama-cpp-python)
+- **Embeddings**: sentence-transformers (all-MiniLM-L6-v2)
+- **Vector Store**: ChromaDB
+- **UI**: Gradio

app.py ADDED Viewed

	@@ -0,0 +1,222 @@

+"""Gradio web interface for FreeRAG - designed for HuggingFace Spaces."""
+import gradio as gr
+from pathlib import Path
+import tempfile
+import os
+from src.config import Config
+from src.rag.pipeline import RAGPipeline
+# Global pipeline instance
+pipeline: RAGPipeline = None
+def get_pipeline() -> RAGPipeline:
+    """Get or create the RAG pipeline."""
+    global pipeline
+    if pipeline is None:
+        pipeline = RAGPipeline(Config.default())
+    return pipeline
+def process_files(files):
+    """Process uploaded files and add to vector store."""
+    if not files:
+        return "Please upload at least one file.", get_stats_text()
+    pipe = get_pipeline()
+    total_chunks = 0
+    processed_files = []
+    for file in files:
+        try:
+            # Get the file path from gradio
+            file_path = file.name if hasattr(file, 'name') else file
+            count = pipe.ingest_file(file_path)
+            total_chunks += count
+            processed_files.append(Path(file_path).name)
+        except Exception as e:
+            return f"Error processing file: {e}", get_stats_text()
+    return (
+        f"✅ Successfully processed {len(processed_files)} file(s)!\n"
+        f"📄 Files: {', '.join(processed_files)}\n"
+        f"📊 Added {total_chunks} chunks to the knowledge base.",
+        get_stats_text()
+    )
+def answer_question(question, top_k, chat_history):
+    """Answer a question using RAG."""
+    if not question.strip():
+        return chat_history, ""
+    pipe = get_pipeline()
+    if pipe.vector_store.get_count() == 0:
+        response = "⚠️ No documents have been uploaded yet. Please upload some documents first."
+    else:
+        try:
+            result = pipe.query(question, top_k=int(top_k))
+            response = result["answer"]
+            # Add sources
+            if result["sources"]:
+                sources = [s["filename"] for s in result["sources"]]
+                response += f"\n\n---\n📚 *Sources: {', '.join(sources)}*"
+        except Exception as e:
+            response = f"❌ Error: {e}"
+    chat_history.append((question, response))
+    return chat_history, ""
+def get_stats_text() -> str:
+    """Get stats as formatted text."""
+    pipe = get_pipeline()
+    stats = pipe.get_stats()
+    return (
+        f"📊 Documents: {stats['documents_count']} chunks\n"
+        f"🤖 Model: Phi-3.5-mini\n"
+        f"📐 Embeddings: {stats['embedding_model']}"
+    )
+def clear_knowledge_base():
+    """Clear all documents from the vector store."""
+    pipe = get_pipeline()
+    pipe.vector_store.clear()
+    return "🗑️ Knowledge base cleared.", get_stats_text()
+# Custom CSS for modern dark theme
+custom_css = """
+.gradio-container {
+    max-width: 1200px !important;
+}
+.chat-message {
+    padding: 12px;
+    border-radius: 8px;
+    margin: 8px 0;
+}
+footer {
+    display: none !important;
+}
+"""
+# Build Gradio interface
+with gr.Blocks(
+    title="FreeRAG - Local RAG System",
+    theme=gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="slate"
+    ),
+    css=custom_css
+) as demo:
+    gr.Markdown("""
+    # 🚀 FreeRAG
+    ### Local RAG System powered by Phi-3.5-mini
+    Upload your documents and ask questions! Everything runs locally with no data leaving your machine.
+    """)
+    with gr.Row():
+        # Left column - Document Upload
+        with gr.Column(scale=1):
+            gr.Markdown("### 📁 Upload Documents")
+            file_upload = gr.File(
+                label="Upload files (PDF, DOCX, TXT, MD)",
+                file_count="multiple",
+                file_types=[".pdf", ".docx", ".txt", ".md"]
+            )
+            upload_btn = gr.Button("📤 Process Documents", variant="primary")
+            upload_status = gr.Textbox(label="Status", lines=3, interactive=False)
+            gr.Markdown("### 📊 Knowledge Base Stats")
+            stats_display = gr.Textbox(
+                label="",
+                value=get_stats_text,
+                lines=3,
+                interactive=False,
+                every=5  # Refresh every 5 seconds
+            )
+            clear_btn = gr.Button("🗑️ Clear Knowledge Base", variant="secondary")
+        # Right column - Chat Interface
+        with gr.Column(scale=2):
+            gr.Markdown("### 💬 Ask Questions")
+            chatbot = gr.Chatbot(
+                label="Conversation",
+                height=400,
+                show_copy_button=True
+            )
+            with gr.Row():
+                question_input = gr.Textbox(
+                    label="Your Question",
+                    placeholder="Ask anything about your documents...",
+                    scale=4,
+                    show_label=False
+                )
+                top_k_slider = gr.Slider(
+                    minimum=1,
+                    maximum=10,
+                    value=3,
+                    step=1,
+                    label="Sources",
+                    scale=1
+                )
+            with gr.Row():
+                submit_btn = gr.Button("🔍 Ask", variant="primary", scale=2)
+                clear_chat_btn = gr.Button("🧹 Clear Chat", scale=1)
+    # Event handlers
+    upload_btn.click(
+        fn=process_files,
+        inputs=[file_upload],
+        outputs=[upload_status, stats_display]
+    )
+    submit_btn.click(
+        fn=answer_question,
+        inputs=[question_input, top_k_slider, chatbot],
+        outputs=[chatbot, question_input]
+    )
+    question_input.submit(
+        fn=answer_question,
+        inputs=[question_input, top_k_slider, chatbot],
+        outputs=[chatbot, question_input]
+    )
+    clear_btn.click(
+        fn=clear_knowledge_base,
+        outputs=[upload_status, stats_display]
+    )
+    clear_chat_btn.click(
+        fn=lambda: [],
+        outputs=[chatbot]
+    )
+    gr.Markdown("""
+    ---
+    <center>
+    <p style="color: gray;">
+    Built with 💙 using Phi-3.5-mini, ChromaDB, and Gradio |
+    <a href="https://github.com/yourusername/FreeRAG">GitHub</a>
+    </p>
+    </center>
+    """)
+if __name__ == "__main__":
+    demo.launch()

main.py ADDED Viewed

	@@ -0,0 +1,131 @@

+"""FreeRAG - A modular RAG system using Phi-3.5-mini.
+CLI entrypoint for ingesting documents and querying the RAG system.
+"""
+import typer
+from rich.console import Console
+from rich.panel import Panel
+from rich.markdown import Markdown
+from pathlib import Path
+from src.config import Config
+from src.rag.pipeline import RAGPipeline
+app = typer.Typer(help="FreeRAG - Local RAG system with Phi-3.5-mini")
+console = Console()
+def get_pipeline() -> RAGPipeline:
+    """Get or create the RAG pipeline."""
+    return RAGPipeline(Config.default())
+@app.command()
+def ingest(
+    path: str = typer.Argument(..., help="Path to file or directory to ingest"),
+    recursive: bool = typer.Option(True, "--recursive/--no-recursive", "-r", help="Recursively search directories")
+):
+    """Ingest documents into the vector store."""
+    pipeline = get_pipeline()
+    path_obj = Path(path)
+    if not path_obj.exists():
+        console.print(f"[red]Error: Path not found: {path}[/red]")
+        raise typer.Exit(1)
+    with console.status("[bold green]Ingesting documents..."):
+        if path_obj.is_file():
+            count = pipeline.ingest_file(path)
+        else:
+            count = pipeline.ingest_directory(path, recursive=recursive)
+    console.print(Panel(f"[green]Successfully ingested {count} chunks![/green]"))
+@app.command()
+def query(
+    question: str = typer.Argument(..., help="Question to ask"),
+    top_k: int = typer.Option(3, "--top-k", "-k", help="Number of documents to retrieve")
+):
+    """Query the RAG system."""
+    pipeline = get_pipeline()
+    if pipeline.vector_store.get_count() == 0:
+        console.print("[yellow]Warning: No documents in vector store. Run 'ingest' first.[/yellow]")
+    with console.status("[bold green]Thinking..."):
+        result = pipeline.query(question, top_k=top_k)
+    # Display answer
+    console.print(Panel(Markdown(result["answer"]), title="[bold blue]Answer[/bold blue]"))
+    # Display sources
+    if result["sources"]:
+        console.print("\n[dim]Sources:[/dim]")
+        for src in result["sources"]:
+            console.print(f"  • {src['filename']}")
+@app.command()
+def chat():
+    """Interactive chat mode."""
+    pipeline = get_pipeline()
+    console.print(Panel(
+        "[bold]FreeRAG Chat Mode[/bold]\n"
+        "Type your questions and press Enter.\n"
+        "Type 'exit' or 'quit' to stop.",
+        title="🤖 FreeRAG"
+    ))
+    doc_count = pipeline.vector_store.get_count()
+    console.print(f"[dim]Loaded {doc_count} document chunks.[/dim]\n")
+    while True:
+        try:
+            question = console.input("[bold blue]You:[/bold blue] ")
+            if question.lower() in ["exit", "quit", "q"]:
+                console.print("[dim]Goodbye![/dim]")
+                break
+            if not question.strip():
+                continue
+            with console.status("[bold green]Thinking..."):
+                answer = pipeline.chat(question)
+            console.print(f"[bold green]Assistant:[/bold green] {answer}\n")
+        except KeyboardInterrupt:
+            console.print("\n[dim]Goodbye![/dim]")
+            break
+@app.command()
+def stats():
+    """Show vector store statistics."""
+    pipeline = get_pipeline()
+    stats = pipeline.get_stats()
+    console.print(Panel(
+        f"📊 [bold]Documents:[/bold] {stats['documents_count']} chunks\n"
+        f"🗃️  [bold]Collection:[/bold] {stats['collection_name']}\n"
+        f"🤖 [bold]LLM:[/bold] {stats['model']}\n"
+        f"📐 [bold]Embeddings:[/bold] {stats['embedding_model']}",
+        title="FreeRAG Statistics"
+    ))
+@app.command()
+def clear():
+    """Clear the vector store."""
+    if typer.confirm("Are you sure you want to clear all documents?"):
+        pipeline = get_pipeline()
+        pipeline.vector_store.clear()
+        console.print("[green]Vector store cleared.[/green]")
+if __name__ == "__main__":
+    app()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,18 @@

+[project]
+name = "freerag"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "chromadb>=0.4.22",
+    "gradio>=4.0.0",
+    "huggingface-hub>=0.20.0",
+    "ipykernel>=7.1.0",
+    "llama-cpp-python>=0.2.50",
+    "pypdf>=3.17.0",
+    "python-docx>=1.1.0",
+    "rich>=13.7.0",
+    "sentence-transformers>=2.2.2",
+    "typer>=0.9.0",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+# Core Dependencies
+huggingface_hub>=0.20.0
+llama-cpp-python>=0.2.50
+# Embeddings
+sentence-transformers>=2.2.2
+# Vector Store
+chromadb>=0.4.22
+# Document Loaders
+pypdf>=3.17.0
+python-docx>=1.1.0
+# CLI & Utils
+rich>=13.7.0
+typer>=0.9.0
+# Web UI (for HuggingFace Spaces)
+gradio>=4.0.0
+# Dev
+ipykernel

src/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""FreeRAG - A modular RAG system using Phi-3.5-mini."""
+from src.config import Config
+__version__ = "0.1.0"
+__all__ = ["Config"]

src/config.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""Configuration settings for FreeRAG."""
+from dataclasses import dataclass, field
+from pathlib import Path
+@dataclass
+class ModelConfig:
+    """LLM model configuration."""
+    repo_id: str = "bartowski/Phi-3.5-mini-instruct-GGUF"
+    filename: str = "Phi-3.5-mini-instruct-Q4_K_M.gguf"
+    n_ctx: int = 4096
+    n_threads: int = 4
+    max_tokens: int = 512
+    temperature: float = 0.7
+    verbose: bool = False
+@dataclass
+class EmbeddingConfig:
+    """Embedding model configuration."""
+    model_name: str = "all-MiniLM-L6-v2"
+    device: str = "cpu"
+@dataclass
+class VectorStoreConfig:
+    """Vector store configuration."""
+    collection_name: str = "freerag_documents"
+    persist_directory: str = "./chroma_db"
+    top_k: int = 3
+@dataclass
+class ChunkingConfig:
+    """Text chunking configuration."""
+    chunk_size: int = 500
+    chunk_overlap: int = 50
+@dataclass
+class Config:
+    """Main configuration container."""
+    model: ModelConfig = field(default_factory=ModelConfig)
+    embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig)
+    vectorstore: VectorStoreConfig = field(default_factory=VectorStoreConfig)
+    chunking: ChunkingConfig = field(default_factory=ChunkingConfig)
+    data_directory: str = "./data"
+    @classmethod
+    def default(cls) -> "Config":
+        """Create default configuration."""
+        return cls()
+    def ensure_directories(self) -> None:
+        """Ensure required directories exist."""
+        Path(self.data_directory).mkdir(parents=True, exist_ok=True)
+        Path(self.vectorstore.persist_directory).mkdir(parents=True, exist_ok=True)

src/document_loader/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""Document loader module for FreeRAG."""
+from src.document_loader.loader import DocumentLoader, Document
+from src.document_loader.splitter import TextSplitter
+__all__ = ["DocumentLoader", "Document", "TextSplitter"]

src/document_loader/loader.py ADDED Viewed

	@@ -0,0 +1,152 @@

+"""Document loader for various file formats."""
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import List, Optional, Dict, Any
+@dataclass
+class Document:
+    """Represents a loaded document."""
+    content: str
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    @property
+    def source(self) -> str:
+        """Get document source path."""
+        return self.metadata.get("source", "unknown")
+class DocumentLoader:
+    """Load documents from various file formats."""
+    SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx"}
+    def __init__(self):
+        """Initialize the document loader."""
+        self._pdf_loader = None
+        self._docx_loader = None
+    def load_file(self, file_path: str) -> Document:
+        """Load a single file.
+        Args:
+            file_path: Path to the file.
+        Returns:
+            Loaded document.
+        Raises:
+            ValueError: If file format is not supported.
+            FileNotFoundError: If file doesn't exist.
+        """
+        path = Path(file_path)
+        if not path.exists():
+            raise FileNotFoundError(f"File not found: {file_path}")
+        extension = path.suffix.lower()
+        if extension not in self.SUPPORTED_EXTENSIONS:
+            raise ValueError(
+                f"Unsupported file format: {extension}. "
+                f"Supported: {self.SUPPORTED_EXTENSIONS}"
+            )
+        content = self._load_by_extension(path, extension)
+        return Document(
+            content=content,
+            metadata={
+                "source": str(path.absolute()),
+                "filename": path.name,
+                "extension": extension
+            }
+        )
+    def load_directory(
+        self,
+        directory_path: str,
+        recursive: bool = True
+    ) -> List[Document]:
+        """Load all supported files from a directory.
+        Args:
+            directory_path: Path to the directory.
+            recursive: Whether to search recursively.
+        Returns:
+            List of loaded documents.
+        """
+        path = Path(directory_path)
+        if not path.exists():
+            raise FileNotFoundError(f"Directory not found: {directory_path}")
+        if not path.is_dir():
+            raise ValueError(f"Not a directory: {directory_path}")
+        documents = []
+        pattern = "**/*" if recursive else "*"
+        for file_path in path.glob(pattern):
+            if file_path.is_file() and file_path.suffix.lower() in self.SUPPORTED_EXTENSIONS:
+                try:
+                    doc = self.load_file(str(file_path))
+                    documents.append(doc)
+                    print(f"Loaded: {file_path.name}")
+                except Exception as e:
+                    print(f"Warning: Failed to load {file_path.name}: {e}")
+        return documents
+    def _load_by_extension(self, path: Path, extension: str) -> str:
+        """Load file content based on extension.
+        Args:
+            path: File path.
+            extension: File extension.
+        Returns:
+            File content as string.
+        """
+        if extension in {".txt", ".md"}:
+            return self._load_text(path)
+        elif extension == ".pdf":
+            return self._load_pdf(path)
+        elif extension == ".docx":
+            return self._load_docx(path)
+        else:
+            raise ValueError(f"Unknown extension: {extension}")
+    def _load_text(self, path: Path) -> str:
+        """Load plain text file."""
+        return path.read_text(encoding="utf-8")
+    def _load_pdf(self, path: Path) -> str:
+        """Load PDF file."""
+        try:
+            from pypdf import PdfReader
+        except ImportError:
+            raise ImportError("pypdf is required for PDF files: pip install pypdf")
+        reader = PdfReader(str(path))
+        text_parts = []
+        for page in reader.pages:
+            text = page.extract_text()
+            if text:
+                text_parts.append(text)
+        return "\n\n".join(text_parts)
+    def _load_docx(self, path: Path) -> str:
+        """Load DOCX file."""
+        try:
+            from docx import Document as DocxDocument
+        except ImportError:
+            raise ImportError("python-docx is required for DOCX files: pip install python-docx")
+        doc = DocxDocument(str(path))
+        paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
+        return "\n\n".join(paragraphs)

src/document_loader/splitter.py ADDED Viewed

	@@ -0,0 +1,103 @@

+"""Text splitter for chunking documents."""
+from dataclasses import dataclass
+from typing import List, Optional
+from src.config import ChunkingConfig
+from src.document_loader.loader import Document
+@dataclass
+class TextChunk:
+    """Represents a chunk of text."""
+    content: str
+    metadata: dict
+    chunk_index: int
+class TextSplitter:
+    """Split text into overlapping chunks."""
+    def __init__(self, config: Optional[ChunkingConfig] = None):
+        """Initialize the text splitter.
+        Args:
+            config: Chunking configuration. Uses defaults if not provided.
+        """
+        self.config = config or ChunkingConfig()
+    def split_text(self, text: str, metadata: Optional[dict] = None) -> List[TextChunk]:
+        """Split text into chunks.
+        Args:
+            text: Text to split.
+            metadata: Optional metadata to attach to chunks.
+        Returns:
+            List of text chunks.
+        """
+        if not text.strip():
+            return []
+        metadata = metadata or {}
+        chunks = []
+        # Split by sentences/paragraphs first
+        text = text.replace("\r\n", "\n")
+        start = 0
+        chunk_index = 0
+        while start < len(text):
+            # Calculate end position
+            end = start + self.config.chunk_size
+            # If not at the end, try to break at a sentence boundary
+            if end < len(text):
+                # Look for sentence boundaries
+                for sep in ["\n\n", "\n", ". ", "! ", "? "]:
+                    last_sep = text.rfind(sep, start, end)
+                    if last_sep > start:
+                        end = last_sep + len(sep)
+                        break
+            else:
+                end = len(text)
+            chunk_text = text[start:end].strip()
+            if chunk_text:
+                chunks.append(TextChunk(
+                    content=chunk_text,
+                    metadata={
+                        **metadata,
+                        "chunk_index": chunk_index,
+                        "start_char": start,
+                        "end_char": end
+                    },
+                    chunk_index=chunk_index
+                ))
+                chunk_index += 1
+            # Move start with overlap
+            start = end - self.config.chunk_overlap
+            if start <= chunks[-1].metadata.get("start_char", 0) if chunks else 0:
+                start = end  # Avoid infinite loop
+        return chunks
+    def split_documents(self, documents: List[Document]) -> List[TextChunk]:
+        """Split multiple documents into chunks.
+        Args:
+            documents: List of documents to split.
+        Returns:
+            List of text chunks from all documents.
+        """
+        all_chunks = []
+        for doc in documents:
+            chunks = self.split_text(doc.content, doc.metadata)
+            all_chunks.extend(chunks)
+        return all_chunks

src/embeddings/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Embeddings module for FreeRAG."""
+from src.embeddings.sentence_embeddings import EmbeddingModel
+__all__ = ["EmbeddingModel"]

src/embeddings/sentence_embeddings.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""Sentence embeddings using sentence-transformers."""
+import os
+# Disable TensorFlow to avoid import conflicts with transformers
+os.environ.setdefault("USE_TF", "0")
+os.environ.setdefault("TRANSFORMERS_NO_TF", "1")
+from typing import List, Optional
+from sentence_transformers import SentenceTransformer
+import numpy as np
+from src.config import EmbeddingConfig
+class EmbeddingModel:
+    """Embedding model wrapper using sentence-transformers."""
+    def __init__(self, config: Optional[EmbeddingConfig] = None):
+        """Initialize the embedding model.
+        Args:
+            config: Embedding configuration. Uses defaults if not provided.
+        """
+        self.config = config or EmbeddingConfig()
+        self._model: Optional[SentenceTransformer] = None
+    @property
+    def model(self) -> SentenceTransformer:
+        """Lazy load the embedding model."""
+        if self._model is None:
+            print(f"Loading embedding model: {self.config.model_name}...")
+            self._model = SentenceTransformer(
+                self.config.model_name,
+                device=self.config.device
+            )
+            print("Embedding model loaded!")
+        return self._model
+    @property
+    def dimension(self) -> int:
+        """Get embedding dimension."""
+        return self.model.get_sentence_embedding_dimension()
+    def embed_text(self, text: str) -> List[float]:
+        """Embed a single text.
+        Args:
+            text: Text to embed.
+        Returns:
+            Embedding vector as list of floats.
+        """
+        embedding = self.model.encode(text, convert_to_numpy=True)
+        return embedding.tolist()
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Embed multiple texts.
+        Args:
+            texts: List of texts to embed.
+        Returns:
+            List of embedding vectors.
+        """
+        embeddings = self.model.encode(texts, convert_to_numpy=True)
+        return embeddings.tolist()
+    def __call__(self, texts: List[str]) -> List[List[float]]:
+        """Make the class callable for ChromaDB compatibility.
+        Args:
+            texts: List of texts to embed.
+        Returns:
+            List of embedding vectors.
+        """
+        return self.embed_documents(texts)

src/llm/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""LLM module for FreeRAG."""
+from src.llm.phi_model import PhiModel
+__all__ = ["PhiModel"]

src/llm/phi_model.py ADDED Viewed

	@@ -0,0 +1,121 @@

+"""Phi-3.5-mini model wrapper using llama-cpp-python."""
+from typing import Optional, List, Dict, Any
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+from src.config import ModelConfig
+class PhiModel:
+    """Wrapper for Phi-3.5-mini model."""
+    def __init__(self, config: Optional[ModelConfig] = None):
+        """Initialize the model wrapper.
+        Args:
+            config: Model configuration. Uses defaults if not provided.
+        """
+        self.config = config or ModelConfig()
+        self._model: Optional[Llama] = None
+        self._model_path: Optional[str] = None
+    @property
+    def model(self) -> Llama:
+        """Lazy load the model."""
+        if self._model is None:
+            self._load_model()
+        return self._model
+    def _load_model(self) -> None:
+        """Download and load the model."""
+        print(f"Downloading model from {self.config.repo_id}...")
+        self._model_path = hf_hub_download(
+            repo_id=self.config.repo_id,
+            filename=self.config.filename
+        )
+        print("Loading model into memory...")
+        self._model = Llama(
+            model_path=self._model_path,
+            n_ctx=self.config.n_ctx,
+            n_threads=self.config.n_threads,
+            verbose=self.config.verbose
+        )
+        print("Model loaded successfully!")
+    def generate(self, prompt: str, max_tokens: Optional[int] = None) -> str:
+        """Generate text completion.
+        Args:
+            prompt: Input prompt.
+            max_tokens: Maximum tokens to generate.
+        Returns:
+            Generated text.
+        """
+        output = self.model(
+            prompt,
+            max_tokens=max_tokens or self.config.max_tokens,
+            temperature=self.config.temperature,
+            echo=False
+        )
+        return output["choices"][0]["text"].strip()
+    def chat(
+        self,
+        messages: List[Dict[str, str]],
+        max_tokens: Optional[int] = None
+    ) -> str:
+        """Generate chat completion.
+        Args:
+            messages: List of message dicts with 'role' and 'content'.
+            max_tokens: Maximum tokens to generate.
+        Returns:
+            Assistant's response.
+        """
+        output = self.model.create_chat_completion(
+            messages=messages,
+            max_tokens=max_tokens or self.config.max_tokens,
+            temperature=self.config.temperature
+        )
+        return output["choices"][0]["message"]["content"].strip()
+    def chat_with_context(
+        self,
+        query: str,
+        context: str,
+        system_prompt: Optional[str] = None
+    ) -> str:
+        """Generate response with RAG context.
+        Args:
+            query: User's question.
+            context: Retrieved context from documents.
+            system_prompt: Optional system prompt.
+        Returns:
+            Generated response.
+        """
+        if system_prompt is None:
+            system_prompt = (
+                "You are a helpful assistant. Answer the user's question based on "
+                "the provided context. If the context doesn't contain relevant "
+                "information, say so honestly. Be concise and accurate."
+            )
+        user_message = f"""Context:
+{context}
+Question: {query}
+Please answer based on the context provided above."""
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_message}
+        ]
+        return self.chat(messages)

src/rag/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""RAG pipeline module for FreeRAG."""
+from src.rag.retriever import Retriever
+from src.rag.pipeline import RAGPipeline
+__all__ = ["Retriever", "RAGPipeline"]

src/rag/pipeline.py ADDED Viewed

	@@ -0,0 +1,177 @@

+"""Main RAG pipeline orchestrating all components."""
+from typing import Optional, Dict, Any
+from src.config import Config
+from src.llm.phi_model import PhiModel
+from src.embeddings.sentence_embeddings import EmbeddingModel
+from src.document_loader.loader import DocumentLoader
+from src.document_loader.splitter import TextSplitter
+from src.vectorstore.chroma_store import VectorStore
+from src.rag.retriever import Retriever
+class RAGPipeline:
+    """Main RAG pipeline combining all components."""
+    def __init__(self, config: Optional[Config] = None):
+        """Initialize the RAG pipeline.
+        Args:
+            config: Configuration. Uses defaults if not provided.
+        """
+        self.config = config or Config.default()
+        self.config.ensure_directories()
+        # Initialize components lazily
+        self._llm: Optional[PhiModel] = None
+        self._embedding_model: Optional[EmbeddingModel] = None
+        self._vector_store: Optional[VectorStore] = None
+        self._retriever: Optional[Retriever] = None
+        self._document_loader: Optional[DocumentLoader] = None
+        self._text_splitter: Optional[TextSplitter] = None
+    @property
+    def llm(self) -> PhiModel:
+        """Get LLM instance."""
+        if self._llm is None:
+            self._llm = PhiModel(self.config.model)
+        return self._llm
+    @property
+    def embedding_model(self) -> EmbeddingModel:
+        """Get embedding model instance."""
+        if self._embedding_model is None:
+            self._embedding_model = EmbeddingModel(self.config.embedding)
+        return self._embedding_model
+    @property
+    def vector_store(self) -> VectorStore:
+        """Get vector store instance."""
+        if self._vector_store is None:
+            self._vector_store = VectorStore(
+                self.config.vectorstore,
+                self.embedding_model
+            )
+        return self._vector_store
+    @property
+    def retriever(self) -> Retriever:
+        """Get retriever instance."""
+        if self._retriever is None:
+            self._retriever = Retriever(
+                self.vector_store,
+                top_k=self.config.vectorstore.top_k
+            )
+        return self._retriever
+    @property
+    def document_loader(self) -> DocumentLoader:
+        """Get document loader instance."""
+        if self._document_loader is None:
+            self._document_loader = DocumentLoader()
+        return self._document_loader
+    @property
+    def text_splitter(self) -> TextSplitter:
+        """Get text splitter instance."""
+        if self._text_splitter is None:
+            self._text_splitter = TextSplitter(self.config.chunking)
+        return self._text_splitter
+    def ingest_file(self, file_path: str) -> int:
+        """Ingest a single file into the vector store.
+        Args:
+            file_path: Path to the file.
+        Returns:
+            Number of chunks added.
+        """
+        print(f"Loading file: {file_path}")
+        document = self.document_loader.load_file(file_path)
+        print("Splitting into chunks...")
+        chunks = self.text_splitter.split_text(document.content, document.metadata)
+        print(f"Adding {len(chunks)} chunks to vector store...")
+        return self.vector_store.add_chunks(chunks)
+    def ingest_directory(self, directory_path: str, recursive: bool = True) -> int:
+        """Ingest all files from a directory.
+        Args:
+            directory_path: Path to the directory.
+            recursive: Whether to search recursively.
+        Returns:
+            Total number of chunks added.
+        """
+        print(f"Loading documents from: {directory_path}")
+        documents = self.document_loader.load_directory(directory_path, recursive)
+        if not documents:
+            print("No documents found.")
+            return 0
+        print(f"Loaded {len(documents)} documents. Splitting into chunks...")
+        chunks = self.text_splitter.split_documents(documents)
+        print(f"Adding {len(chunks)} chunks to vector store...")
+        return self.vector_store.add_chunks(chunks)
+    def query(self, question: str, top_k: Optional[int] = None) -> Dict[str, Any]:
+        """Query the RAG system.
+        Args:
+            question: User's question.
+            top_k: Number of documents to retrieve.
+        Returns:
+            Dict with answer and sources.
+        """
+        # Retrieve relevant context
+        context = self.retriever.retrieve_text(question, top_k)
+        sources = self.retriever.retrieve(question, top_k)
+        # Generate answer using LLM
+        answer = self.llm.chat_with_context(question, context)
+        return {
+            "question": question,
+            "answer": answer,
+            "context": context,
+            "sources": [
+                {
+                    "filename": s["metadata"].get("filename", "Unknown"),
+                    "source": s["metadata"].get("source", "Unknown"),
+                    "distance": s.get("distance")
+                }
+                for s in sources
+            ]
+        }
+    def chat(self, question: str) -> str:
+        """Simple chat interface that returns just the answer.
+        Args:
+            question: User's question.
+        Returns:
+            Answer string.
+        """
+        result = self.query(question)
+        return result["answer"]
+    def get_stats(self) -> Dict[str, Any]:
+        """Get pipeline statistics.
+        Returns:
+            Dict with stats about the pipeline.
+        """
+        return {
+            "documents_count": self.vector_store.get_count(),
+            "collection_name": self.config.vectorstore.collection_name,
+            "model": self.config.model.repo_id,
+            "embedding_model": self.config.embedding.model_name
+        }

src/rag/retriever.py ADDED Viewed

	@@ -0,0 +1,54 @@

+"""Document retriever for RAG pipeline."""
+from typing import List, Dict, Any, Optional
+from src.vectorstore.chroma_store import VectorStore
+class Retriever:
+    """Retrieve relevant documents from the vector store."""
+    def __init__(self, vector_store: VectorStore, top_k: int = 3):
+        """Initialize the retriever.
+        Args:
+            vector_store: Vector store to search.
+            top_k: Number of documents to retrieve.
+        """
+        self.vector_store = vector_store
+        self.top_k = top_k
+    def retrieve(self, query: str, top_k: Optional[int] = None) -> List[Dict[str, Any]]:
+        """Retrieve relevant documents for a query.
+        Args:
+            query: User query.
+            top_k: Override default number of results.
+        Returns:
+            List of relevant documents with metadata.
+        """
+        return self.vector_store.search(query, top_k=top_k or self.top_k)
+    def retrieve_text(self, query: str, top_k: Optional[int] = None) -> str:
+        """Retrieve and format documents as a single context string.
+        Args:
+            query: User query.
+            top_k: Override default number of results.
+        Returns:
+            Formatted context string.
+        """
+        results = self.retrieve(query, top_k)
+        if not results:
+            return "No relevant documents found."
+        context_parts = []
+        for i, result in enumerate(results, 1):
+            source = result["metadata"].get("filename", "Unknown")
+            content = result["content"]
+            context_parts.append(f"[Source {i}: {source}]\n{content}")
+        return "\n\n---\n\n".join(context_parts)

src/vectorstore/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Vector store module for FreeRAG."""
+from src.vectorstore.chroma_store import VectorStore
+__all__ = ["VectorStore"]

src/vectorstore/chroma_store.py ADDED Viewed

	@@ -0,0 +1,136 @@

+"""ChromaDB vector store implementation."""
+from typing import List, Optional, Dict, Any
+from pathlib import Path
+import chromadb
+from chromadb.config import Settings
+from src.config import VectorStoreConfig
+from src.embeddings.sentence_embeddings import EmbeddingModel
+from src.document_loader.splitter import TextChunk
+class VectorStore:
+    """ChromaDB-based vector store for document storage and retrieval."""
+    def __init__(
+        self,
+        config: Optional[VectorStoreConfig] = None,
+        embedding_model: Optional[EmbeddingModel] = None
+    ):
+        """Initialize the vector store.
+        Args:
+            config: Vector store configuration.
+            embedding_model: Embedding model for generating vectors.
+        """
+        self.config = config or VectorStoreConfig()
+        self.embedding_model = embedding_model or EmbeddingModel()
+        self._client: Optional[chromadb.Client] = None
+        self._collection = None
+    @property
+    def client(self) -> chromadb.Client:
+        """Get or create ChromaDB client."""
+        if self._client is None:
+            persist_path = Path(self.config.persist_directory)
+            persist_path.mkdir(parents=True, exist_ok=True)
+            self._client = chromadb.PersistentClient(
+                path=str(persist_path),
+                settings=Settings(anonymized_telemetry=False)
+            )
+        return self._client
+    @property
+    def collection(self):
+        """Get or create the collection."""
+        if self._collection is None:
+            self._collection = self.client.get_or_create_collection(
+                name=self.config.collection_name,
+                metadata={"hnsw:space": "cosine"}
+            )
+        return self._collection
+    def add_chunks(self, chunks: List[TextChunk]) -> int:
+        """Add text chunks to the vector store.
+        Args:
+            chunks: List of text chunks to add.
+        Returns:
+            Number of chunks added.
+        """
+        if not chunks:
+            return 0
+        # Prepare data for ChromaDB
+        documents = [chunk.content for chunk in chunks]
+        metadatas = [chunk.metadata for chunk in chunks]
+        # Generate unique IDs
+        existing_count = self.collection.count()
+        ids = [f"doc_{existing_count + i}" for i in range(len(chunks))]
+        # Generate embeddings
+        print(f"Generating embeddings for {len(chunks)} chunks...")
+        embeddings = self.embedding_model.embed_documents(documents)
+        # Add to collection
+        self.collection.add(
+            ids=ids,
+            documents=documents,
+            metadatas=metadatas,
+            embeddings=embeddings
+        )
+        print(f"Added {len(chunks)} chunks to vector store.")
+        return len(chunks)
+    def search(
+        self,
+        query: str,
+        top_k: Optional[int] = None
+    ) -> List[Dict[str, Any]]:
+        """Search for similar documents.
+        Args:
+            query: Search query.
+            top_k: Number of results to return.
+        Returns:
+            List of results with document, metadata, and distance.
+        """
+        top_k = top_k or self.config.top_k
+        # Generate query embedding
+        query_embedding = self.embedding_model.embed_text(query)
+        # Search
+        results = self.collection.query(
+            query_embeddings=[query_embedding],
+            n_results=top_k,
+            include=["documents", "metadatas", "distances"]
+        )
+        # Format results
+        formatted = []
+        if results["documents"]:
+            for i, doc in enumerate(results["documents"][0]):
+                formatted.append({
+                    "content": doc,
+                    "metadata": results["metadatas"][0][i] if results["metadatas"] else {},
+                    "distance": results["distances"][0][i] if results["distances"] else None
+                })
+        return formatted
+    def get_count(self) -> int:
+        """Get the number of documents in the store."""
+        return self.collection.count()
+    def clear(self) -> None:
+        """Clear all documents from the collection."""
+        self.client.delete_collection(self.config.collection_name)
+        self._collection = None
+        print("Vector store cleared.")

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff