Spaces:

pasxalisag
/

Codey-Bryant

Sleeping

App Files Files Community

pasxalisag commited on Dec 4, 2025

Commit

8aeda0c

verified ·

1 Parent(s): 5fdee4f

Delete build_hf.py

Browse files

Files changed (1) hide show

build_hf.py +0 -200

build_hf.py DELETED Viewed

@@ -1,200 +0,0 @@
-"""
-Build script optimized for Hugging Face Spaces deployment
-Maintains the exact same SOTA RAG architecture
-"""
-import os
-import sys
-import logging
-import pickle
-import json
-import numpy as np
-import torch
-from pathlib import Path
-# Add parent directory to path
-sys.path.append('.')
-from app import (
-    load_opc_datasets,
-    build_retrieval_system,
-    ARTIFACT_DIR,
-    FAISS_AVAILABLE,
-    MODEL_NAME,
-    EMBED_MODEL,
-    MAX_CORPUS_SIZE
-)
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.StreamHandler(sys.stdout),
-        logging.FileHandler('/data/build.log')
-    ]
-)
-logger = logging.getLogger(__name__)
-def check_artifacts():
-    """Check if artifacts already exist"""
-    required_files = [
-        "corpus_data.json",
-        "corpus_embeddings.npy",
-        "answer_embeddings.npy",
-        "bm25.pkl"
-    ]
-    if FAISS_AVAILABLE:
-        required_files.append("faiss_index.bin")
-    all_exist = all(os.path.exists(os.path.join(ARTIFACT_DIR, f)) for f in required_files)
-    return all_exist
-def build_retrieval_with_progress():
-    """Build retrieval system with progress tracking"""
-    logger.info("Building SOTA RAG Retrieval System for Coding Assistant")
-    logger.info(f"Architecture: HyDE + Query Rewriting + Multi-Query + Answer-Space Retrieval")
-    logger.info(f"Embedding Model: {EMBED_MODEL}")
-    logger.info(f"Max Corpus Size: {MAX_CORPUS_SIZE}")
-    # Load datasets
-    logger.info("Loading coding datasets...")
-    ds_map = load_opc_datasets()
-    # Build retrieval system (using the exact same function from app.py)
-    logger.info("Building retrieval system...")
-    retrieval_system = build_retrieval_system(ds_map)
-    logger.info("Retrieval system built successfully!")
-    logger.info(f"   - Corpus size: {len(retrieval_system.corpus_texts)}")
-    logger.info(f"   - Embedding dimension: {retrieval_system.corpus_embeddings.shape[1]}")
-    logger.info(f"   - FAISS index: {'Yes' if retrieval_system.faiss_index else 'No'}")
-    return retrieval_system
-def prepare_llm_artifacts():
-    """Prepare LLM artifacts without downloading the full model"""
-    logger.info("🤖 Preparing LLM configuration...")
-    from transformers import AutoTokenizer, GenerationConfig
-    llm_path = os.path.join(ARTIFACT_DIR, "llm_model")
-    os.makedirs(llm_path, exist_ok=True)
-    # Download and save tokenizer
-    logger.info(f"📥 Downloading tokenizer for {MODEL_NAME}...")
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-    # Use the exact same chat template from app.py
-    tokenizer.chat_template = (
-        "{% for message in messages %}"
-        "{{'<|'+message['role']+'|>\\n'+message['content']+'</s>\\n'}}"
-        "{% endfor %}"
-        "{% if add_generation_prompt %}"
-        "<|assistant|>\n"
-        "{% endif %}"
-    )
-    # Use the exact same generation config from app.py
-    generation_config = GenerationConfig(
-        max_new_tokens=300,
-        temperature=0.7,
-        top_p=0.9,
-        do_sample=True,
-        repetition_penalty=1.15,
-        pad_token_id=tokenizer.pad_token_id
-    )
-    # Save tokenizer and config
-    tokenizer.save_pretrained(llm_path)
-    generation_config.save_pretrained(llm_path)
-    # Create minimal config file
-    config = {
-        "_name_or_path": MODEL_NAME,
-        "architectures": ["LlamaForCausalLM"],
-        "model_type": "llama",
-        "torch_dtype": "float16",
-        "quantization_config": {
-            "load_in_4bit": True,
-            "bnb_4bit_compute_dtype": "float32",
-            "bnb_4bit_use_double_quant": True,
-            "bnb_4bit_quant_type": "nf4"
-        } if torch.cuda.is_available() else {}
-    }
-    config_path = os.path.join(llm_path, "config.json")
-    with open(config_path, "w") as f:
-        json.dump(config, f, indent=2)
-    logger.info(f"LLM configuration saved to {llm_path}")
-    logger.info("Note: Full model will be downloaded at runtime with 4-bit quantization")
-def verify_artifacts():
-    """Verify all artifacts are properly built"""
-    logger.info("🔍 Verifying artifacts...")
-    files_to_check = {
-        "corpus_data.json": "Corpus data",
-        "corpus_embeddings.npy": "Question embeddings",
-        "answer_embeddings.npy": "Answer embeddings",
-        "bm25.pkl": "BM25 index",
-        "faiss_index.bin": "FAISS index"
-    }
-    for file, description in files_to_check.items():
-        path = os.path.join(ARTIFACT_DIR, file)
-        if os.path.exists(path):
-            size_mb = os.path.getsize(path) / (1024 * 1024)
-            logger.info(f"   ✓ {description}: {size_mb:.2f} MB")
-        else:
-            if file != "faiss_index.bin" or FAISS_AVAILABLE:
-                logger.warning(f"   ✗ Missing: {description}")
-def main():
-    """Main build process"""
-    logger.info("=" * 60)
-    logger.info("🤖 Codey Bryant 3.0 - SOTA RAG Build Script")
-    logger.info("=" * 60)
-    # Create artifacts directory
-    os.makedirs(ARTIFACT_DIR, exist_ok=True)
-    # Check if we need to rebuild
-    if check_artifacts():
-        logger.info("Artifacts already exist. Skipping build.")
-        logger.info("Delete artifacts to force rebuild.")
-    else:
-        logger.info("Building fresh artifacts...")
-        # Build retrieval system
-        build_retrieval_with_progress()
-        # Prepare LLM artifacts
-        prepare_llm_artifacts()
-        logger.info("Build complete!")
-    # Verify artifacts
-    verify_artifacts()
-    # Show total size
-    logger.info("\nArtifact Summary:")
-    total_size = 0
-    for root, dirs, files in os.walk(ARTIFACT_DIR):
-        for file in files:
-            filepath = os.path.join(root, file)
-            size_mb = os.path.getsize(filepath) / (1024 * 1024)
-            total_size += size_mb
-    logger.info(f"   Total size: {total_size:.2f} MB")
-    logger.info("=" * 60)
-    logger.info("Ready to launch Codey Bryant!")
-    logger.info("   Run: python app.py")
-    logger.info("=" * 60)
-if __name__ == "__main__":
-    main()