Spaces:

galbendavids
/

CarsRUS

Sleeping

App Files Files Community

galbendavids commited on Jan 28

Commit

a98f7cb

verified ·

1 Parent(s): da458f9

optimize rag flow

Browse files

Files changed (5) hide show

__pycache__/rag_engine.cpython-313.pyc +0 -0
__pycache__/rag_engine.cpython-37.pyc +0 -0
app.py +8 -2
rag_engine.py +1 -1
test_rag.py +163 -0

__pycache__/rag_engine.cpython-313.pyc CHANGED Viewed

Binary files a/__pycache__/rag_engine.cpython-313.pyc and b/__pycache__/rag_engine.cpython-313.pyc differ

__pycache__/rag_engine.cpython-37.pyc CHANGED Viewed

Binary files a/__pycache__/rag_engine.cpython-37.pyc and b/__pycache__/rag_engine.cpython-37.pyc differ

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ Implemented Features:
 import gradio as gr
 import os
 from rag_engine import RAGEngine
 # Initialize RAG Engine - Loads data and builds indices
@@ -26,6 +27,8 @@ try:
     print(f"✅ Engine ready with {len(engine.chunks)} smart chunks")
 except Exception as e:
     print(f"❌ Error initializing RAG Engine: {e}")
     engine = None
@@ -60,14 +63,17 @@ Get your key from: [Google AI Studio](https://aistudio.google.com/apikey)"""
 The RAG Engine failed to load. This usually means:
 - Data files are missing
 - Environment is misconfigured
-Please check the Space logs for details."""
     try:
         # Generate response using the RAG engine
         response = engine.generate_response(message, history, api_key)
         return response
     except Exception as e:
         return f"""❌ **Error Processing Query**

 import gradio as gr
 import os
+import traceback
 from rag_engine import RAGEngine
 # Initialize RAG Engine - Loads data and builds indices
     print(f"✅ Engine ready with {len(engine.chunks)} smart chunks")
 except Exception as e:
     print(f"❌ Error initializing RAG Engine: {e}")
+    print("Full traceback:")
+    traceback.print_exc()
     engine = None
 The RAG Engine failed to load. This usually means:
 - Data files are missing
 - Environment is misconfigured
+- Check the Space logs for specific error details
+Common solutions:
+1. Ensure data_ingestion/scraped_data.json exists
+2. Check that all dependencies are installed
+3. Verify the workspace path is correct"""
     try:
         # Generate response using the RAG engine
         response = engine.generate_response(message, history, api_key)
         return response
     except Exception as e:
         return f"""❌ **Error Processing Query**

rag_engine.py CHANGED Viewed

@@ -249,7 +249,7 @@ class RAGEngine:
                     self.keyword_index[keyword].append(len(self.chunks) - 1)
         print(f"Created {len(self.chunks)} smart chunks from {len(raw_data)} articles with rich metadata.")
-        self._build_index()
     def _extract_car_type(self, title: str) -> str:
         """זיהוי סוג הרכב"""

                     self.keyword_index[keyword].append(len(self.chunks) - 1)
         print(f"Created {len(self.chunks)} smart chunks from {len(raw_data)} articles with rich metadata.")
+        # Don't build index here - let it be lazy loaded on first search
     def _extract_car_type(self, title: str) -> str:
         """זיהוי סוג הרכב"""

test_rag.py ADDED Viewed

	@@ -0,0 +1,163 @@

+#!/usr/bin/env python
+"""
+Simple test file for RAG Engine
+Tests basic initialization and search functionality
+"""
+import sys
+import os
+# Add project to path
+sys.path.insert(0, os.path.dirname(__file__))
+def test_initialization():
+    """Test RAG engine initialization"""
+    print("\n" + "="*60)
+    print("TEST 1: RAG Engine Initialization")
+    print("="*60)
+    from rag_engine import RAGEngine
+    try:
+        engine = RAGEngine()
+        print(f"✅ Engine initialized successfully")
+        print(f"   - Chunks loaded: {len(engine.chunks)}")
+        print(f"   - Metadata entries: {len(engine.chunk_metadata)}")
+        print(f"   - Keyword index entries: {len(engine.keyword_index)}")
+        print(f"   - Embeddings: {engine.embeddings}")
+        return True, engine
+    except Exception as e:
+        print(f"❌ Initialization failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False, None
+def test_search(engine):
+    """Test hybrid search functionality"""
+    print("\n" + "="*60)
+    print("TEST 2: Hybrid Search")
+    print("="*60)
+    try:
+        query = "Tell me about the Audi RS3"
+        print(f"Testing search for: '{query}'")
+        results = engine._hybrid_search(query, top_k=3)
+        print(f"✅ Search successful")
+        print(f"   - Results found: {len(results)}")
+        if results:
+            print(f"   - Top result score: {results[0]['score']:.3f}")
+            print(f"   - Top result title: {results[0]['metadata']['title']}")
+        return True
+    except Exception as e:
+        print(f"❌ Search failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def test_car_normalization(engine):
+    """Test car name normalization"""
+    print("\n" + "="*60)
+    print("TEST 3: Car Name Normalization")
+    print("="*60)
+    test_cases = [
+        ("Audi RS3", "audi_rs3"),
+        ("RS3", "audi_rs3"),
+        ("קיה EV9", "kia_ev9"),
+        ("Citroen C3", "citroen_c3"),
+    ]
+    passed = 0
+    failed = 0
+    for text, expected in test_cases:
+        result = engine._normalize_car_name(text)
+        if result == expected:
+            print(f"✅ '{text}' → {result}")
+            passed += 1
+        else:
+            print(f"❌ '{text}' → {result} (expected {expected})")
+            failed += 1
+    print(f"   - Passed: {passed}/{len(test_cases)}")
+    return failed == 0
+def test_embeddings(engine):
+    """Test that embeddings are lazy loaded"""
+    print("\n" + "="*60)
+    print("TEST 4: Lazy Embedding Loading")
+    print("="*60)
+    try:
+        # Check initial state
+        if engine.embeddings is None:
+            print("✅ Embeddings are None at startup (lazy loading working)")
+        else:
+            print("⚠️  Embeddings already loaded (not lazy)")
+        # Trigger embedding generation
+        query = "Test query"
+        engine._hybrid_search(query, top_k=1)
+        if engine.embeddings is not None:
+            print(f"✅ Embeddings generated after first search")
+            print(f"   - Shape: {engine.embeddings.shape}")
+            print(f"   - Expected chunks: {len(engine.chunks)}")
+            return True
+        else:
+            print(f"❌ Embeddings not generated")
+            return False
+    except Exception as e:
+        print(f"❌ Embedding test failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def main():
+    """Run all tests"""
+    print("\n" + "="*60)
+    print("CARSRUS RAG ENGINE TEST SUITE")
+    print("="*60)
+    # Test 1: Initialization
+    success, engine = test_initialization()
+    if not success:
+        print("\n❌ TESTS FAILED - Initialization error")
+        return 1
+    # Test 2: Normalization
+    if not test_car_normalization(engine):
+        print("\n⚠️  Some normalization tests failed")
+    # Test 3: Search
+    if not test_search(engine):
+        print("\n❌ TESTS FAILED - Search error")
+        return 1
+    # Test 4: Embeddings
+    if not test_embeddings(engine):
+        print("\n⚠️  Embedding test had issues")
+    # Summary
+    print("\n" + "="*60)
+    print("✅ ALL CRITICAL TESTS PASSED")
+    print("="*60)
+    print("\nRAG Engine is ready for deployment!")
+    print("- Initialization: ✅")
+    print("- Data loading: ✅")
+    print("- Search functionality: ✅")
+    print("- Lazy loading: ✅")
+    return 0
+if __name__ == "__main__":
+    exit(main())