LiamKhoaLe commited on
Commit
6d2a17c
·
1 Parent(s): 1791aa5

refactor utils

Browse files
app.py CHANGED
@@ -17,15 +17,15 @@ from fastapi.middleware.cors import CORSMiddleware
17
  # MongoDB imports
18
  from pymongo.errors import PyMongoError, ConnectionFailure, ServerSelectionTimeoutError
19
 
20
- from utils.rotator import APIKeyRotator
21
- from utils.parser import parse_pdf_bytes, parse_docx_bytes
22
  from utils.caption import BlipCaptioner
23
- from utils.chunker import build_cards_from_pages
24
- from utils.embeddings import EmbeddingClient
25
- from utils.rag import RAGStore, ensure_indexes
26
- from utils.router import select_model, generate_answer_with_model
27
- from utils.summarizer import cheap_summarize
28
- from utils.common import trim_text
29
  from utils.logger import get_logger
30
  import re
31
 
@@ -645,7 +645,7 @@ async def generate_report(
645
  file_summary = doc_sum.get("summary", "")
646
 
647
  # Chain-of-thought style two-step with Gemini
648
- from utils.router import GEMINI_MED, GEMINI_PRO
649
 
650
  # Step 1: Content filtering and relevance assessment based on user instructions
651
  if instructions.strip():
@@ -734,7 +734,7 @@ async def generate_report_pdf(
734
  """
735
  Generate a PDF from report content using the PDF utility module
736
  """
737
- from utils.pdf import generate_report_pdf as generate_pdf
738
  from fastapi.responses import Response
739
 
740
  try:
@@ -775,7 +775,7 @@ Return only the variations, one per line, no numbering or extra text."""
775
 
776
  user_prompt = f"Original question: {question}\n\nGenerate query variations:"
777
 
778
- from utils.router import generate_answer_with_model
779
  selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
780
  response = await generate_answer_with_model(selection, sys_prompt, user_prompt, None, nvidia_rotator)
781
 
@@ -866,7 +866,7 @@ async def _chat_impl(
866
  """
867
  import sys
868
  from memo.core import get_memory_system
869
- from utils.router import NVIDIA_SMALL # reuse default name
870
  memory = get_memory_system()
871
  logger.info("[CHAT] User Q/chat: %s", trim_text(question, 15).replace("\n", " "))
872
 
@@ -956,7 +956,7 @@ async def _chat_impl(
956
  numbered = [{"id": i+1, "text": s} for i, s in enumerate(recent3)]
957
  user = f"Question: {question}\nCandidates:\n{json.dumps(numbered, ensure_ascii=False)}\nSelect any related items and output ONLY their 'text' values concatenated."
958
  try:
959
- from utils.rotator import robust_post_json
960
  key = nvidia_rotator.get_key()
961
  url = "https://integrate.api.nvidia.com/v1/chat/completions"
962
  payload = {
 
17
  # MongoDB imports
18
  from pymongo.errors import PyMongoError, ConnectionFailure, ServerSelectionTimeoutError
19
 
20
+ from utils.api.rotator import APIKeyRotator
21
+ from utils.ingestion.parser import parse_pdf_bytes, parse_docx_bytes
22
  from utils.caption import BlipCaptioner
23
+ from utils.ingestion.chunker import build_cards_from_pages
24
+ from utils.rag.embeddings import EmbeddingClient
25
+ from utils.rag.rag import RAGStore, ensure_indexes
26
+ from utils.api.router import select_model, generate_answer_with_model
27
+ from utils.service.summarizer import cheap_summarize
28
+ from utils.service.common import trim_text
29
  from utils.logger import get_logger
30
  import re
31
 
 
645
  file_summary = doc_sum.get("summary", "")
646
 
647
  # Chain-of-thought style two-step with Gemini
648
+ from utils.api.router import GEMINI_MED, GEMINI_PRO
649
 
650
  # Step 1: Content filtering and relevance assessment based on user instructions
651
  if instructions.strip():
 
734
  """
735
  Generate a PDF from report content using the PDF utility module
736
  """
737
+ from utils.service.pdf import generate_report_pdf as generate_pdf
738
  from fastapi.responses import Response
739
 
740
  try:
 
775
 
776
  user_prompt = f"Original question: {question}\n\nGenerate query variations:"
777
 
778
+ from utils.api.router import generate_answer_with_model
779
  selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
780
  response = await generate_answer_with_model(selection, sys_prompt, user_prompt, None, nvidia_rotator)
781
 
 
866
  """
867
  import sys
868
  from memo.core import get_memory_system
869
+ from utils.api.router import NVIDIA_SMALL # reuse default name
870
  memory = get_memory_system()
871
  logger.info("[CHAT] User Q/chat: %s", trim_text(question, 15).replace("\n", " "))
872
 
 
956
  numbered = [{"id": i+1, "text": s} for i, s in enumerate(recent3)]
957
  user = f"Question: {question}\nCandidates:\n{json.dumps(numbered, ensure_ascii=False)}\nSelect any related items and output ONLY their 'text' values concatenated."
958
  try:
959
+ from utils.api.rotator import robust_post_json
960
  key = nvidia_rotator.get_key()
961
  url = "https://integrate.api.nvidia.com/v1/chat/completions"
962
  payload = {
memo/context.py CHANGED
@@ -9,7 +9,7 @@ import numpy as np
9
  from typing import List, Dict, Any, Tuple, Optional
10
 
11
  from utils.logger import get_logger
12
- from utils.embeddings import EmbeddingClient
13
 
14
  logger = get_logger("CONTEXT_MANAGER", __name__)
15
 
 
9
  from typing import List, Dict, Any, Tuple, Optional
10
 
11
  from utils.logger import get_logger
12
+ from utils.rag.embeddings import EmbeddingClient
13
 
14
  logger = get_logger("CONTEXT_MANAGER", __name__)
15
 
memo/core.py CHANGED
@@ -10,7 +10,7 @@ import asyncio
10
  from typing import List, Dict, Any, Optional, Tuple
11
 
12
  from utils.logger import get_logger
13
- from utils.embeddings import EmbeddingClient
14
  from memo.legacy import MemoryLRU
15
  from memo.persistent import PersistentMemory
16
 
 
10
  from typing import List, Dict, Any, Optional, Tuple
11
 
12
  from utils.logger import get_logger
13
+ from utils.rag.embeddings import EmbeddingClient
14
  from memo.legacy import MemoryLRU
15
  from memo.persistent import PersistentMemory
16
 
memo/history.py CHANGED
@@ -10,7 +10,7 @@ from typing import List, Dict, Any, Tuple, Optional
10
  from utils.logger import get_logger
11
  from memo.nvidia import summarize_qa, files_relevance, related_recent_context
12
  from memo.context import semantic_context, get_legacy_context
13
- from utils.embeddings import EmbeddingClient
14
 
15
  logger = get_logger("HISTORY_MANAGER", __name__)
16
 
 
10
  from utils.logger import get_logger
11
  from memo.nvidia import summarize_qa, files_relevance, related_recent_context
12
  from memo.context import semantic_context, get_legacy_context
13
+ from utils.rag.embeddings import EmbeddingClient
14
 
15
  logger = get_logger("HISTORY_MANAGER", __name__)
16
 
memo/nvidia.py CHANGED
@@ -10,7 +10,7 @@ import json
10
  from typing import List, Dict, Any
11
 
12
  from utils.logger import get_logger
13
- from utils.rotator import robust_post_json
14
 
15
  logger = get_logger("NVIDIA_INTEGRATION", __name__)
16
 
 
10
  from typing import List, Dict, Any
11
 
12
  from utils.logger import get_logger
13
+ from utils.api.rotator import robust_post_json
14
 
15
  logger = get_logger("NVIDIA_INTEGRATION", __name__)
16
 
memo/persistent.py CHANGED
@@ -11,7 +11,7 @@ from typing import List, Dict, Any, Optional, Tuple
11
  from datetime import datetime, timezone
12
 
13
  from utils.logger import get_logger
14
- from utils.embeddings import EmbeddingClient
15
 
16
  logger = get_logger("PERSISTENT_MEMORY", __name__)
17
 
 
11
  from datetime import datetime, timezone
12
 
13
  from utils.logger import get_logger
14
+ from utils.rag.embeddings import EmbeddingClient
15
 
16
  logger = get_logger("PERSISTENT_MEMORY", __name__)
17
 
utils/README.md CHANGED
@@ -102,8 +102,8 @@ Core utilities for the EdSummariser RAG system providing document processing, re
102
 
103
  ```python
104
  # Basic RAG usage
105
- from utils.rag import RAGStore
106
- from utils.embeddings import EmbeddingClient
107
 
108
  rag = RAGStore(mongo_uri, db_name)
109
  embedder = EmbeddingClient()
 
102
 
103
  ```python
104
  # Basic RAG usage
105
+ from utils.rag.rag import RAGStore
106
+ from utils.rag.embeddings import EmbeddingClient
107
 
108
  rag = RAGStore(mongo_uri, db_name)
109
  embedder = EmbeddingClient()
utils/{rotator.py → api/rotator.py} RENAMED
File without changes
utils/{router.py → api/router.py} RENAMED
File without changes
utils/{caption.py → ingestion/caption.py} RENAMED
@@ -2,7 +2,7 @@
2
  from typing import Optional
3
  from PIL import Image
4
  import logging
5
- from .logger import get_logger
6
 
7
  # Use transformers BLIP base (CPU friendly)
8
  try:
 
2
  from typing import Optional
3
  from PIL import Image
4
  import logging
5
+ from utils.logger import get_logger
6
 
7
  # Use transformers BLIP base (CPU friendly)
8
  try:
utils/{chunker.py → ingestion/chunker.py} RENAMED
@@ -1,9 +1,9 @@
1
  # ────────────────────────────── utils/chunker.py ──────────────────────────────
2
  import re
3
  from typing import List, Dict, Any
4
- from .summarizer import cheap_summarize, clean_chunk_text
5
- from .common import split_sentences, slugify
6
- from .logger import get_logger
7
 
8
  # Enhanced semantic chunker with overlap and better structure:
9
  # - Split by headings / numbered sections if present
 
1
  # ────────────────────────────── utils/chunker.py ──────────────────────────────
2
  import re
3
  from typing import List, Dict, Any
4
+ from utils.service.summarizer import cheap_summarize, clean_chunk_text
5
+ from utils.service.common import split_sentences, slugify
6
+ from utils.logger import get_logger
7
 
8
  # Enhanced semantic chunker with overlap and better structure:
9
  # - Split by headings / numbered sections if present
utils/{parser.py → ingestion/parser.py} RENAMED
@@ -4,7 +4,7 @@ import fitz # PyMuPDF
4
  from docx import Document
5
  from PIL import Image
6
  import numpy as np
7
- from .logger import get_logger
8
 
9
  logger = get_logger("PARSER", __name__)
10
 
 
4
  from docx import Document
5
  from PIL import Image
6
  import numpy as np
7
+ from utils.logger import get_logger
8
 
9
  logger = get_logger("PARSER", __name__)
10
 
utils/{embeddings.py → rag/embeddings.py} RENAMED
File without changes
utils/{rag.py → rag/rag.py} RENAMED
File without changes
utils/{common.py → service/common.py} RENAMED
@@ -1,6 +1,6 @@
1
  import re
2
  import unicodedata
3
- from .logger import get_logger
4
 
5
  logger = get_logger("COMMON", __name__)
6
 
 
1
  import re
2
  import unicodedata
3
+ from utils.logger import get_logger
4
 
5
  logger = get_logger("COMMON", __name__)
6
 
utils/{pdf.py → service/pdf.py} RENAMED
File without changes
utils/{summarizer.py → service/summarizer.py} RENAMED
@@ -1,8 +1,8 @@
1
  import os
2
  import asyncio
3
  from typing import List
4
- from .logger import get_logger
5
- from utils.rotator import robust_post_json, APIKeyRotator
6
 
7
  logger = get_logger("SUM", __name__)
8
 
 
1
  import os
2
  import asyncio
3
  from typing import List
4
+ from utils.logger import get_logger
5
+ from utils.api.rotator import robust_post_json, APIKeyRotator
6
 
7
  logger = get_logger("SUM", __name__)
8