Spaces:
Sleeping
Sleeping
Commit ·
6d2a17c
1
Parent(s): 1791aa5
refactor utils
Browse files- app.py +13 -13
- memo/context.py +1 -1
- memo/core.py +1 -1
- memo/history.py +1 -1
- memo/nvidia.py +1 -1
- memo/persistent.py +1 -1
- utils/README.md +2 -2
- utils/{rotator.py → api/rotator.py} +0 -0
- utils/{router.py → api/router.py} +0 -0
- utils/{caption.py → ingestion/caption.py} +1 -1
- utils/{chunker.py → ingestion/chunker.py} +3 -3
- utils/{parser.py → ingestion/parser.py} +1 -1
- utils/{embeddings.py → rag/embeddings.py} +0 -0
- utils/{rag.py → rag/rag.py} +0 -0
- utils/{common.py → service/common.py} +1 -1
- utils/{pdf.py → service/pdf.py} +0 -0
- utils/{summarizer.py → service/summarizer.py} +2 -2
app.py
CHANGED
|
@@ -17,15 +17,15 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
| 17 |
# MongoDB imports
|
| 18 |
from pymongo.errors import PyMongoError, ConnectionFailure, ServerSelectionTimeoutError
|
| 19 |
|
| 20 |
-
from utils.rotator import APIKeyRotator
|
| 21 |
-
from utils.parser import parse_pdf_bytes, parse_docx_bytes
|
| 22 |
from utils.caption import BlipCaptioner
|
| 23 |
-
from utils.chunker import build_cards_from_pages
|
| 24 |
-
from utils.embeddings import EmbeddingClient
|
| 25 |
-
from utils.rag import RAGStore, ensure_indexes
|
| 26 |
-
from utils.router import select_model, generate_answer_with_model
|
| 27 |
-
from utils.summarizer import cheap_summarize
|
| 28 |
-
from utils.common import trim_text
|
| 29 |
from utils.logger import get_logger
|
| 30 |
import re
|
| 31 |
|
|
@@ -645,7 +645,7 @@ async def generate_report(
|
|
| 645 |
file_summary = doc_sum.get("summary", "")
|
| 646 |
|
| 647 |
# Chain-of-thought style two-step with Gemini
|
| 648 |
-
from utils.router import GEMINI_MED, GEMINI_PRO
|
| 649 |
|
| 650 |
# Step 1: Content filtering and relevance assessment based on user instructions
|
| 651 |
if instructions.strip():
|
|
@@ -734,7 +734,7 @@ async def generate_report_pdf(
|
|
| 734 |
"""
|
| 735 |
Generate a PDF from report content using the PDF utility module
|
| 736 |
"""
|
| 737 |
-
from utils.pdf import generate_report_pdf as generate_pdf
|
| 738 |
from fastapi.responses import Response
|
| 739 |
|
| 740 |
try:
|
|
@@ -775,7 +775,7 @@ Return only the variations, one per line, no numbering or extra text."""
|
|
| 775 |
|
| 776 |
user_prompt = f"Original question: {question}\n\nGenerate query variations:"
|
| 777 |
|
| 778 |
-
from utils.router import generate_answer_with_model
|
| 779 |
selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
|
| 780 |
response = await generate_answer_with_model(selection, sys_prompt, user_prompt, None, nvidia_rotator)
|
| 781 |
|
|
@@ -866,7 +866,7 @@ async def _chat_impl(
|
|
| 866 |
"""
|
| 867 |
import sys
|
| 868 |
from memo.core import get_memory_system
|
| 869 |
-
from utils.router import NVIDIA_SMALL # reuse default name
|
| 870 |
memory = get_memory_system()
|
| 871 |
logger.info("[CHAT] User Q/chat: %s", trim_text(question, 15).replace("\n", " "))
|
| 872 |
|
|
@@ -956,7 +956,7 @@ async def _chat_impl(
|
|
| 956 |
numbered = [{"id": i+1, "text": s} for i, s in enumerate(recent3)]
|
| 957 |
user = f"Question: {question}\nCandidates:\n{json.dumps(numbered, ensure_ascii=False)}\nSelect any related items and output ONLY their 'text' values concatenated."
|
| 958 |
try:
|
| 959 |
-
from utils.rotator import robust_post_json
|
| 960 |
key = nvidia_rotator.get_key()
|
| 961 |
url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
| 962 |
payload = {
|
|
|
|
| 17 |
# MongoDB imports
|
| 18 |
from pymongo.errors import PyMongoError, ConnectionFailure, ServerSelectionTimeoutError
|
| 19 |
|
| 20 |
+
from utils.api.rotator import APIKeyRotator
|
| 21 |
+
from utils.ingestion.parser import parse_pdf_bytes, parse_docx_bytes
|
| 22 |
from utils.caption import BlipCaptioner
|
| 23 |
+
from utils.ingestion.chunker import build_cards_from_pages
|
| 24 |
+
from utils.rag.embeddings import EmbeddingClient
|
| 25 |
+
from utils.rag.rag import RAGStore, ensure_indexes
|
| 26 |
+
from utils.api.router import select_model, generate_answer_with_model
|
| 27 |
+
from utils.service.summarizer import cheap_summarize
|
| 28 |
+
from utils.service.common import trim_text
|
| 29 |
from utils.logger import get_logger
|
| 30 |
import re
|
| 31 |
|
|
|
|
| 645 |
file_summary = doc_sum.get("summary", "")
|
| 646 |
|
| 647 |
# Chain-of-thought style two-step with Gemini
|
| 648 |
+
from utils.api.router import GEMINI_MED, GEMINI_PRO
|
| 649 |
|
| 650 |
# Step 1: Content filtering and relevance assessment based on user instructions
|
| 651 |
if instructions.strip():
|
|
|
|
| 734 |
"""
|
| 735 |
Generate a PDF from report content using the PDF utility module
|
| 736 |
"""
|
| 737 |
+
from utils.service.pdf import generate_report_pdf as generate_pdf
|
| 738 |
from fastapi.responses import Response
|
| 739 |
|
| 740 |
try:
|
|
|
|
| 775 |
|
| 776 |
user_prompt = f"Original question: {question}\n\nGenerate query variations:"
|
| 777 |
|
| 778 |
+
from utils.api.router import generate_answer_with_model
|
| 779 |
selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
|
| 780 |
response = await generate_answer_with_model(selection, sys_prompt, user_prompt, None, nvidia_rotator)
|
| 781 |
|
|
|
|
| 866 |
"""
|
| 867 |
import sys
|
| 868 |
from memo.core import get_memory_system
|
| 869 |
+
from utils.api.router import NVIDIA_SMALL # reuse default name
|
| 870 |
memory = get_memory_system()
|
| 871 |
logger.info("[CHAT] User Q/chat: %s", trim_text(question, 15).replace("\n", " "))
|
| 872 |
|
|
|
|
| 956 |
numbered = [{"id": i+1, "text": s} for i, s in enumerate(recent3)]
|
| 957 |
user = f"Question: {question}\nCandidates:\n{json.dumps(numbered, ensure_ascii=False)}\nSelect any related items and output ONLY their 'text' values concatenated."
|
| 958 |
try:
|
| 959 |
+
from utils.api.rotator import robust_post_json
|
| 960 |
key = nvidia_rotator.get_key()
|
| 961 |
url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
| 962 |
payload = {
|
memo/context.py
CHANGED
|
@@ -9,7 +9,7 @@ import numpy as np
|
|
| 9 |
from typing import List, Dict, Any, Tuple, Optional
|
| 10 |
|
| 11 |
from utils.logger import get_logger
|
| 12 |
-
from utils.embeddings import EmbeddingClient
|
| 13 |
|
| 14 |
logger = get_logger("CONTEXT_MANAGER", __name__)
|
| 15 |
|
|
|
|
| 9 |
from typing import List, Dict, Any, Tuple, Optional
|
| 10 |
|
| 11 |
from utils.logger import get_logger
|
| 12 |
+
from utils.rag.embeddings import EmbeddingClient
|
| 13 |
|
| 14 |
logger = get_logger("CONTEXT_MANAGER", __name__)
|
| 15 |
|
memo/core.py
CHANGED
|
@@ -10,7 +10,7 @@ import asyncio
|
|
| 10 |
from typing import List, Dict, Any, Optional, Tuple
|
| 11 |
|
| 12 |
from utils.logger import get_logger
|
| 13 |
-
from utils.embeddings import EmbeddingClient
|
| 14 |
from memo.legacy import MemoryLRU
|
| 15 |
from memo.persistent import PersistentMemory
|
| 16 |
|
|
|
|
| 10 |
from typing import List, Dict, Any, Optional, Tuple
|
| 11 |
|
| 12 |
from utils.logger import get_logger
|
| 13 |
+
from utils.rag.embeddings import EmbeddingClient
|
| 14 |
from memo.legacy import MemoryLRU
|
| 15 |
from memo.persistent import PersistentMemory
|
| 16 |
|
memo/history.py
CHANGED
|
@@ -10,7 +10,7 @@ from typing import List, Dict, Any, Tuple, Optional
|
|
| 10 |
from utils.logger import get_logger
|
| 11 |
from memo.nvidia import summarize_qa, files_relevance, related_recent_context
|
| 12 |
from memo.context import semantic_context, get_legacy_context
|
| 13 |
-
from utils.embeddings import EmbeddingClient
|
| 14 |
|
| 15 |
logger = get_logger("HISTORY_MANAGER", __name__)
|
| 16 |
|
|
|
|
| 10 |
from utils.logger import get_logger
|
| 11 |
from memo.nvidia import summarize_qa, files_relevance, related_recent_context
|
| 12 |
from memo.context import semantic_context, get_legacy_context
|
| 13 |
+
from utils.rag.embeddings import EmbeddingClient
|
| 14 |
|
| 15 |
logger = get_logger("HISTORY_MANAGER", __name__)
|
| 16 |
|
memo/nvidia.py
CHANGED
|
@@ -10,7 +10,7 @@ import json
|
|
| 10 |
from typing import List, Dict, Any
|
| 11 |
|
| 12 |
from utils.logger import get_logger
|
| 13 |
-
from utils.rotator import robust_post_json
|
| 14 |
|
| 15 |
logger = get_logger("NVIDIA_INTEGRATION", __name__)
|
| 16 |
|
|
|
|
| 10 |
from typing import List, Dict, Any
|
| 11 |
|
| 12 |
from utils.logger import get_logger
|
| 13 |
+
from utils.api.rotator import robust_post_json
|
| 14 |
|
| 15 |
logger = get_logger("NVIDIA_INTEGRATION", __name__)
|
| 16 |
|
memo/persistent.py
CHANGED
|
@@ -11,7 +11,7 @@ from typing import List, Dict, Any, Optional, Tuple
|
|
| 11 |
from datetime import datetime, timezone
|
| 12 |
|
| 13 |
from utils.logger import get_logger
|
| 14 |
-
from utils.embeddings import EmbeddingClient
|
| 15 |
|
| 16 |
logger = get_logger("PERSISTENT_MEMORY", __name__)
|
| 17 |
|
|
|
|
| 11 |
from datetime import datetime, timezone
|
| 12 |
|
| 13 |
from utils.logger import get_logger
|
| 14 |
+
from utils.rag.embeddings import EmbeddingClient
|
| 15 |
|
| 16 |
logger = get_logger("PERSISTENT_MEMORY", __name__)
|
| 17 |
|
utils/README.md
CHANGED
|
@@ -102,8 +102,8 @@ Core utilities for the EdSummariser RAG system providing document processing, re
|
|
| 102 |
|
| 103 |
```python
|
| 104 |
# Basic RAG usage
|
| 105 |
-
from utils.rag import RAGStore
|
| 106 |
-
from utils.embeddings import EmbeddingClient
|
| 107 |
|
| 108 |
rag = RAGStore(mongo_uri, db_name)
|
| 109 |
embedder = EmbeddingClient()
|
|
|
|
| 102 |
|
| 103 |
```python
|
| 104 |
# Basic RAG usage
|
| 105 |
+
from utils.rag.rag import RAGStore
|
| 106 |
+
from utils.rag.embeddings import EmbeddingClient
|
| 107 |
|
| 108 |
rag = RAGStore(mongo_uri, db_name)
|
| 109 |
embedder = EmbeddingClient()
|
utils/{rotator.py → api/rotator.py}
RENAMED
|
File without changes
|
utils/{router.py → api/router.py}
RENAMED
|
File without changes
|
utils/{caption.py → ingestion/caption.py}
RENAMED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
from typing import Optional
|
| 3 |
from PIL import Image
|
| 4 |
import logging
|
| 5 |
-
from .logger import get_logger
|
| 6 |
|
| 7 |
# Use transformers BLIP base (CPU friendly)
|
| 8 |
try:
|
|
|
|
| 2 |
from typing import Optional
|
| 3 |
from PIL import Image
|
| 4 |
import logging
|
| 5 |
+
from utils.logger import get_logger
|
| 6 |
|
| 7 |
# Use transformers BLIP base (CPU friendly)
|
| 8 |
try:
|
utils/{chunker.py → ingestion/chunker.py}
RENAMED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
# ────────────────────────────── utils/chunker.py ──────────────────────────────
|
| 2 |
import re
|
| 3 |
from typing import List, Dict, Any
|
| 4 |
-
from .summarizer import cheap_summarize, clean_chunk_text
|
| 5 |
-
from .common import split_sentences, slugify
|
| 6 |
-
from .logger import get_logger
|
| 7 |
|
| 8 |
# Enhanced semantic chunker with overlap and better structure:
|
| 9 |
# - Split by headings / numbered sections if present
|
|
|
|
| 1 |
# ────────────────────────────── utils/chunker.py ──────────────────────────────
|
| 2 |
import re
|
| 3 |
from typing import List, Dict, Any
|
| 4 |
+
from utils.service.summarizer import cheap_summarize, clean_chunk_text
|
| 5 |
+
from utils.service.common import split_sentences, slugify
|
| 6 |
+
from utils.logger import get_logger
|
| 7 |
|
| 8 |
# Enhanced semantic chunker with overlap and better structure:
|
| 9 |
# - Split by headings / numbered sections if present
|
utils/{parser.py → ingestion/parser.py}
RENAMED
|
@@ -4,7 +4,7 @@ import fitz # PyMuPDF
|
|
| 4 |
from docx import Document
|
| 5 |
from PIL import Image
|
| 6 |
import numpy as np
|
| 7 |
-
from .logger import get_logger
|
| 8 |
|
| 9 |
logger = get_logger("PARSER", __name__)
|
| 10 |
|
|
|
|
| 4 |
from docx import Document
|
| 5 |
from PIL import Image
|
| 6 |
import numpy as np
|
| 7 |
+
from utils.logger import get_logger
|
| 8 |
|
| 9 |
logger = get_logger("PARSER", __name__)
|
| 10 |
|
utils/{embeddings.py → rag/embeddings.py}
RENAMED
|
File without changes
|
utils/{rag.py → rag/rag.py}
RENAMED
|
File without changes
|
utils/{common.py → service/common.py}
RENAMED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import re
|
| 2 |
import unicodedata
|
| 3 |
-
from .logger import get_logger
|
| 4 |
|
| 5 |
logger = get_logger("COMMON", __name__)
|
| 6 |
|
|
|
|
| 1 |
import re
|
| 2 |
import unicodedata
|
| 3 |
+
from utils.logger import get_logger
|
| 4 |
|
| 5 |
logger = get_logger("COMMON", __name__)
|
| 6 |
|
utils/{pdf.py → service/pdf.py}
RENAMED
|
File without changes
|
utils/{summarizer.py → service/summarizer.py}
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
import os
|
| 2 |
import asyncio
|
| 3 |
from typing import List
|
| 4 |
-
from .logger import get_logger
|
| 5 |
-
from utils.rotator import robust_post_json, APIKeyRotator
|
| 6 |
|
| 7 |
logger = get_logger("SUM", __name__)
|
| 8 |
|
|
|
|
| 1 |
import os
|
| 2 |
import asyncio
|
| 3 |
from typing import List
|
| 4 |
+
from utils.logger import get_logger
|
| 5 |
+
from utils.api.rotator import robust_post_json, APIKeyRotator
|
| 6 |
|
| 7 |
logger = get_logger("SUM", __name__)
|
| 8 |
|