| # Digi-Biz Project Structure |
|
|
| digi-biz/ |
| βββ backend/ |
| β βββ __init__.py |
| β βββ main.py # FastAPI app entry point |
| β βββ config/ |
| β β βββ __init__.py |
| β β βββ settings.py # Pydantic settings loader |
| β βββ agents/ |
| β β βββ __init__.py |
| β β βββ file_discovery.py # Agent 1: ZIP extraction & classification |
| β β βββ document_parsing.py # Agent 2: PDF/DOCX parsing |
| β β βββ table_extraction.py # Agent 3: Table detection |
| β β βββ media_extraction.py # Agent 4: Image/video extraction |
| β β βββ vision_agent.py # Agent 5: Qwen vision analysis |
| β β βββ indexing.py # Agent 6: Page index builder |
| β β βββ schema_mapping.py # Agent 7: Groq schema mapping |
| β β βββ validation.py # Agent 8: Profile validation |
| β βββ parsers/ |
| β β βββ __init__.py |
| β β βββ pdf_parser.py |
| β β βββ docx_parser.py |
| β β βββ excel_parser.py |
| β β βββ parser_factory.py |
| β βββ indexing/ |
| β β βββ __init__.py |
| β β βββ index_builder.py |
| β β βββ keyword_extractor.py |
| β β βββ retriever.py |
| β βββ validation/ |
| β β βββ __init__.py |
| β β βββ schema_validator.py |
| β β βββ completeness.py |
| β βββ models/ |
| β β βββ __init__.py |
| β β βββ schemas.py # Pydantic data models |
| β β βββ enums.py # FileType, TableType, etc. |
| β βββ utils/ |
| β β βββ __init__.py |
| β β βββ logger.py |
| β β βββ file_classifier.py |
| β β βββ storage_manager.py |
| β β βββ text_utils.py |
| β β βββ table_utils.py |
| β β βββ media_utils.py |
| β β βββ groq_client.py # Groq API wrapper |
| β β βββ ollama_client.py # Ollama API wrapper |
| β βββ pipelines/ |
| β βββ __init__.py |
| β βββ digitization_pipeline.py # Main orchestrator |
| βββ frontend/ |
| β βββ src/ |
| β β βββ components/ |
| β β βββ pages/ |
| β β βββ hooks/ |
| β β βββ types/ |
| β βββ package.json |
| βββ storage/ # Created at runtime |
| β βββ uploads/ |
| β βββ extracted/ |
| β βββ profiles/ |
| β βββ index/ |
| β βββ temp/ |
| βββ tests/ |
| β βββ __init__.py |
| β βββ conftest.py # Pytest fixtures |
| β βββ agents/ |
| β β βββ test_file_discovery.py |
| β β βββ test_document_parsing.py |
| β β βββ ... |
| β βββ parsers/ |
| β βββ utils/ |
| β βββ fixtures/ # Test data |
| β βββ sample_business_1/ |
| β βββ sample_business_2/ |
| βββ docs/ |
| β βββ API.md |
| β βββ USER_MANUAL.md |
| βββ .env.example |
| βββ .env # (gitignored - your actual config) |
| βββ .gitignore |
| βββ requirements.txt |
| βββ pytest.ini |
| βββ mypy.ini |
| βββ README.md |
|
|