Upload 14 files
Browse files- RAG_SYSTEM_PLAN.md +2 -2
- app.py +196 -5
- rag/.env +2 -1
- rag/__pycache__/ingest.cpython-312.pyc +0 -0
- rag/__pycache__/utils.cpython-312.pyc +0 -0
- rag/utils.py +285 -27
- requirements.txt +3 -0
- test_statement.txt +10 -0
RAG_SYSTEM_PLAN.md
CHANGED
|
@@ -139,7 +139,7 @@ Request body:
|
|
| 139 |
{
|
| 140 |
"question": "What is the penalty for late tax filing?",
|
| 141 |
"top_k": 5,
|
| 142 |
-
"model": "gemini-2.
|
| 143 |
}
|
| 144 |
```
|
| 145 |
|
|
@@ -201,7 +201,7 @@ Key settings in `ingest.py`:
|
|
| 201 |
## Models Used
|
| 202 |
|
| 203 |
- **Embeddings**: `text-embedding-004` (768 dimensions)
|
| 204 |
-
- **Generation**: `gemini-2.
|
| 205 |
- Can also use `gemini-2.0-pro` for complex reasoning
|
| 206 |
|
| 207 |
## Security Considerations
|
|
|
|
| 139 |
{
|
| 140 |
"question": "What is the penalty for late tax filing?",
|
| 141 |
"top_k": 5,
|
| 142 |
+
"model": "gemini-2.5-flash"
|
| 143 |
}
|
| 144 |
```
|
| 145 |
|
|
|
|
| 201 |
## Models Used
|
| 202 |
|
| 203 |
- **Embeddings**: `text-embedding-004` (768 dimensions)
|
| 204 |
+
- **Generation**: `gemini-2.5-flash` (default, fast)
|
| 205 |
- Can also use `gemini-2.0-pro` for complex reasoning
|
| 206 |
|
| 207 |
## Security Considerations
|
app.py
CHANGED
|
@@ -7,7 +7,7 @@ from contextlib import asynccontextmanager
|
|
| 7 |
from collections import defaultdict
|
| 8 |
|
| 9 |
from fastapi import FastAPI, HTTPException, UploadFile, File, Request, Depends, Form
|
| 10 |
-
from typing import Optional
|
| 11 |
from fastapi.middleware.cors import CORSMiddleware
|
| 12 |
from fastapi.security import APIKeyHeader
|
| 13 |
from pydantic import BaseModel, Field
|
|
@@ -147,6 +147,18 @@ class HealthResponse(BaseModel):
|
|
| 147 |
vectors_indexed: int
|
| 148 |
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
@app.get("/", response_model=dict)
|
| 151 |
async def root():
|
| 152 |
return {
|
|
@@ -209,7 +221,8 @@ async def ask_question(
|
|
| 209 |
|
| 210 |
image_data = None
|
| 211 |
image_mime_type = None
|
| 212 |
-
|
|
|
|
| 213 |
if image and image.filename:
|
| 214 |
allowed_types = ["image/jpeg", "image/png", "image/gif", "image/webp"]
|
| 215 |
if image.content_type not in allowed_types:
|
|
@@ -222,7 +235,38 @@ async def ask_question(
|
|
| 222 |
|
| 223 |
image_data = await image.read()
|
| 224 |
image_mime_type = image.content_type
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
try:
|
| 227 |
query_embedding = generate_query_embedding(gemini_client, question)
|
| 228 |
except Exception as e:
|
|
@@ -265,7 +309,21 @@ async def ask_question(
|
|
| 265 |
})
|
| 266 |
|
| 267 |
context = "\n\n---\n\n".join(context_parts)
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
conversation_history = conversation_sessions.get(session_id, [])
|
| 270 |
|
| 271 |
try:
|
|
@@ -354,7 +412,7 @@ async def ingest_document(
|
|
| 354 |
async def get_stats(api_key: str = Depends(verify_api_key)):
|
| 355 |
if pinecone_index is None:
|
| 356 |
raise HTTPException(status_code=503, detail="Pinecone not initialized.")
|
| 357 |
-
|
| 358 |
try:
|
| 359 |
stats = pinecone_index.describe_index_stats()
|
| 360 |
return StatsResponse(
|
|
@@ -366,6 +424,139 @@ async def get_stats(api_key: str = Depends(verify_api_key)):
|
|
| 366 |
raise HTTPException(status_code=500, detail=f"Error getting stats: {str(e)}")
|
| 367 |
|
| 368 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
if __name__ == "__main__":
|
| 370 |
import uvicorn
|
| 371 |
uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
|
|
|
|
| 7 |
from collections import defaultdict
|
| 8 |
|
| 9 |
from fastapi import FastAPI, HTTPException, UploadFile, File, Request, Depends, Form
|
| 10 |
+
from typing import Optional, List
|
| 11 |
from fastapi.middleware.cors import CORSMiddleware
|
| 12 |
from fastapi.security import APIKeyHeader
|
| 13 |
from pydantic import BaseModel, Field
|
|
|
|
| 147 |
vectors_indexed: int
|
| 148 |
|
| 149 |
|
| 150 |
+
class YearlyWrapRequest(BaseModel):
|
| 151 |
+
year: int = Field(default=2024, ge=2000, le=2030)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
class YearlyWrapResponse(BaseModel):
|
| 155 |
+
analysis: dict
|
| 156 |
+
video_script: Optional[dict]
|
| 157 |
+
video_url: Optional[str]
|
| 158 |
+
status: str
|
| 159 |
+
message: str
|
| 160 |
+
|
| 161 |
+
|
| 162 |
@app.get("/", response_model=dict)
|
| 163 |
async def root():
|
| 164 |
return {
|
|
|
|
| 221 |
|
| 222 |
image_data = None
|
| 223 |
image_mime_type = None
|
| 224 |
+
document_text = ""
|
| 225 |
+
|
| 226 |
if image and image.filename:
|
| 227 |
allowed_types = ["image/jpeg", "image/png", "image/gif", "image/webp"]
|
| 228 |
if image.content_type not in allowed_types:
|
|
|
|
| 235 |
|
| 236 |
image_data = await image.read()
|
| 237 |
image_mime_type = image.content_type
|
| 238 |
+
|
| 239 |
+
if document and document.filename:
|
| 240 |
+
allowed_exts = [".pdf", ".doc", ".docx", ".txt"]
|
| 241 |
+
if not any(document.filename.lower().endswith(ext) for ext in allowed_exts):
|
| 242 |
+
raise HTTPException(
|
| 243 |
+
status_code=400,
|
| 244 |
+
detail=f"Invalid document type. Allowed: {', '.join(allowed_exts)}"
|
| 245 |
+
)
|
| 246 |
+
if document.size and document.size > 50 * 1024 * 1024:
|
| 247 |
+
raise HTTPException(status_code=400, detail="Document too large. Max 50MB.")
|
| 248 |
+
|
| 249 |
+
doc_content = await document.read()
|
| 250 |
+
|
| 251 |
+
try:
|
| 252 |
+
from rag.ingest import extract_text_from_file
|
| 253 |
+
import tempfile
|
| 254 |
+
import os
|
| 255 |
+
|
| 256 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(document.filename)[1]) as tmp_file:
|
| 257 |
+
tmp_file.write(doc_content)
|
| 258 |
+
tmp_file_path = tmp_file.name
|
| 259 |
+
|
| 260 |
+
try:
|
| 261 |
+
document_text = extract_text_from_file(Path(tmp_file_path))
|
| 262 |
+
if document_text:
|
| 263 |
+
document_text = f"[User Uploaded Document: {document.filename}]\n{document_text}"
|
| 264 |
+
finally:
|
| 265 |
+
os.unlink(tmp_file_path)
|
| 266 |
+
|
| 267 |
+
except Exception as e:
|
| 268 |
+
raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
|
| 269 |
+
|
| 270 |
try:
|
| 271 |
query_embedding = generate_query_embedding(gemini_client, question)
|
| 272 |
except Exception as e:
|
|
|
|
| 309 |
})
|
| 310 |
|
| 311 |
context = "\n\n---\n\n".join(context_parts)
|
| 312 |
+
|
| 313 |
+
if document_text:
|
| 314 |
+
context = f"""[Tax Document Analysis - User Uploaded File]
|
| 315 |
+
{document_text}
|
| 316 |
+
|
| 317 |
+
--- Tax Law Reference Context ---
|
| 318 |
+
{context}
|
| 319 |
+
|
| 320 |
+
[TAX ANALYSIS INSTRUCTIONS]
|
| 321 |
+
- Analyze the uploaded document for tax-relevant information, forms, and declarations
|
| 322 |
+
- Identify tax amounts, deadlines, compliance requirements, and filing obligations
|
| 323 |
+
- Cross-reference with Nigerian tax laws from the retrieved context
|
| 324 |
+
- Provide specific guidance on tax declarations, calculations, and compliance
|
| 325 |
+
- Highlight any missing information or additional documents needed"""
|
| 326 |
+
|
| 327 |
conversation_history = conversation_sessions.get(session_id, [])
|
| 328 |
|
| 329 |
try:
|
|
|
|
| 412 |
async def get_stats(api_key: str = Depends(verify_api_key)):
|
| 413 |
if pinecone_index is None:
|
| 414 |
raise HTTPException(status_code=503, detail="Pinecone not initialized.")
|
| 415 |
+
|
| 416 |
try:
|
| 417 |
stats = pinecone_index.describe_index_stats()
|
| 418 |
return StatsResponse(
|
|
|
|
| 424 |
raise HTTPException(status_code=500, detail=f"Error getting stats: {str(e)}")
|
| 425 |
|
| 426 |
|
| 427 |
+
@app.post("/yearly-wrap", response_model=YearlyWrapResponse)
|
| 428 |
+
async def create_yearly_wrap(
|
| 429 |
+
request: Request,
|
| 430 |
+
year: int = Form(..., ge=2000, le=2030),
|
| 431 |
+
api_key: str = Depends(verify_api_key)
|
| 432 |
+
):
|
| 433 |
+
"""
|
| 434 |
+
Create a yearly financial wrap video from account statements and financial documents.
|
| 435 |
+
|
| 436 |
+
Upload your bank statements, investment reports, tax documents, and financial images
|
| 437 |
+
to generate a personalized yearly financial summary video.
|
| 438 |
+
"""
|
| 439 |
+
# Parse multipart form data manually to handle optional files properly
|
| 440 |
+
form = await request.form()
|
| 441 |
+
|
| 442 |
+
# Get documents and images from form data
|
| 443 |
+
documents = []
|
| 444 |
+
images = []
|
| 445 |
+
|
| 446 |
+
for field_name, field_value in form.items():
|
| 447 |
+
if field_name.startswith("documents"):
|
| 448 |
+
if hasattr(field_value, 'filename') and field_value.filename:
|
| 449 |
+
documents.append(field_value)
|
| 450 |
+
elif field_name.startswith("images"):
|
| 451 |
+
if hasattr(field_value, 'filename') and field_value.filename:
|
| 452 |
+
images.append(field_value)
|
| 453 |
+
|
| 454 |
+
if not documents and not images:
|
| 455 |
+
raise HTTPException(
|
| 456 |
+
status_code=400,
|
| 457 |
+
detail="Please upload at least one financial document or image"
|
| 458 |
+
)
|
| 459 |
+
|
| 460 |
+
# Process documents
|
| 461 |
+
document_texts = []
|
| 462 |
+
image_data_list = []
|
| 463 |
+
|
| 464 |
+
# Handle documents
|
| 465 |
+
if documents:
|
| 466 |
+
for doc in documents:
|
| 467 |
+
if doc.filename:
|
| 468 |
+
allowed_exts = [".pdf", ".doc", ".docx", ".txt"]
|
| 469 |
+
if not any(doc.filename.lower().endswith(ext) for ext in allowed_exts):
|
| 470 |
+
raise HTTPException(
|
| 471 |
+
status_code=400,
|
| 472 |
+
detail=f"Document type not supported: {doc.filename}"
|
| 473 |
+
)
|
| 474 |
+
if doc.size and doc.size > 50 * 1024 * 1024:
|
| 475 |
+
raise HTTPException(status_code=400, detail="Document too large. Max 50MB.")
|
| 476 |
+
|
| 477 |
+
doc_content = await doc.read()
|
| 478 |
+
|
| 479 |
+
try:
|
| 480 |
+
from rag.ingest import extract_text_from_file
|
| 481 |
+
import tempfile
|
| 482 |
+
|
| 483 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(doc.filename)[1]) as tmp_file:
|
| 484 |
+
tmp_file.write(doc_content)
|
| 485 |
+
tmp_file_path = tmp_file.name
|
| 486 |
+
|
| 487 |
+
try:
|
| 488 |
+
text = extract_text_from_file(Path(tmp_file_path))
|
| 489 |
+
if text:
|
| 490 |
+
document_texts.append(f"[Document: {doc.filename}]\n{text}")
|
| 491 |
+
finally:
|
| 492 |
+
os.unlink(tmp_file_path)
|
| 493 |
+
|
| 494 |
+
except Exception as e:
|
| 495 |
+
raise HTTPException(status_code=500, detail=f"Error processing document {doc.filename}: {str(e)}")
|
| 496 |
+
|
| 497 |
+
# Handle images
|
| 498 |
+
if images:
|
| 499 |
+
for img in images:
|
| 500 |
+
if img.filename:
|
| 501 |
+
allowed_types = ["image/jpeg", "image/png", "image/gif", "image/webp"]
|
| 502 |
+
if img.content_type not in allowed_types:
|
| 503 |
+
raise HTTPException(
|
| 504 |
+
status_code=400,
|
| 505 |
+
detail=f"Image type not supported: {img.filename}"
|
| 506 |
+
)
|
| 507 |
+
if img.size and img.size > 10 * 1024 * 1024:
|
| 508 |
+
raise HTTPException(status_code=400, detail="Image too large. Max 10MB.")
|
| 509 |
+
|
| 510 |
+
img_data = await img.read()
|
| 511 |
+
image_data_list.append(img_data)
|
| 512 |
+
|
| 513 |
+
# Analyze financial data
|
| 514 |
+
try:
|
| 515 |
+
from rag.utils import analyze_financial_documents
|
| 516 |
+
analysis = analyze_financial_documents(document_texts, image_data_list)
|
| 517 |
+
except Exception as e:
|
| 518 |
+
raise HTTPException(status_code=500, detail=f"Error analyzing financial data: {str(e)}")
|
| 519 |
+
|
| 520 |
+
# Generate video script and video
|
| 521 |
+
video_script = None
|
| 522 |
+
video_url = None
|
| 523 |
+
|
| 524 |
+
try:
|
| 525 |
+
from rag.utils import create_video_script, generate_yearly_wrap_video
|
| 526 |
+
|
| 527 |
+
# Step 1: Create professional video script
|
| 528 |
+
video_script = create_video_script(analysis)
|
| 529 |
+
|
| 530 |
+
# Step 2: Generate video from script
|
| 531 |
+
video_url = generate_yearly_wrap_video(analysis)
|
| 532 |
+
|
| 533 |
+
if video_url:
|
| 534 |
+
status = "completed"
|
| 535 |
+
message = f"Yearly financial wrap for {year} created successfully with professional video script and animation!"
|
| 536 |
+
else:
|
| 537 |
+
status = "script_only"
|
| 538 |
+
message = f"Yearly financial wrap script created successfully! Video generation is temporarily unavailable due to service limitations, but you have a complete professional script ready."
|
| 539 |
+
|
| 540 |
+
except Exception as e:
|
| 541 |
+
error_msg = str(e)
|
| 542 |
+
if "Video generation" in error_msg:
|
| 543 |
+
# Script succeeded, video failed
|
| 544 |
+
status = "script_only"
|
| 545 |
+
message = f"Financial analysis and video script created, but video generation failed: {error_msg}"
|
| 546 |
+
else:
|
| 547 |
+
# Both failed
|
| 548 |
+
status = "analysis_only"
|
| 549 |
+
message = f"Financial analysis completed, but script/video generation failed: {error_msg}"
|
| 550 |
+
|
| 551 |
+
return YearlyWrapResponse(
|
| 552 |
+
analysis=analysis,
|
| 553 |
+
video_script=video_script,
|
| 554 |
+
video_url=video_url,
|
| 555 |
+
status=status,
|
| 556 |
+
message=message
|
| 557 |
+
)
|
| 558 |
+
|
| 559 |
+
|
| 560 |
if __name__ == "__main__":
|
| 561 |
import uvicorn
|
| 562 |
uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
|
rag/.env
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
-
GEMINI_API_KEY=
|
| 2 |
PINECONE_API_KEY=pcsk_2BGUcf_CzBnGUF9jP7UTgL6Ned77DVj6zV75RBGyKfFMxVqzw36bAQAc6HiH1nwdMLBoYA
|
| 3 |
PINECONE_INDEX=sabitax
|
|
|
|
| 4 |
|
| 5 |
# Security
|
| 6 |
API_KEY=11e10c46685090a8a464f7c8a8f09cd519b69836935a2c8897b71472e2b74138
|
|
|
|
| 1 |
+
GEMINI_API_KEY=AIzaSyCxyrpiDh8ancmuUd55Ak2hAEPyC9dfzqM
|
| 2 |
PINECONE_API_KEY=pcsk_2BGUcf_CzBnGUF9jP7UTgL6Ned77DVj6zV75RBGyKfFMxVqzw36bAQAc6HiH1nwdMLBoYA
|
| 3 |
PINECONE_INDEX=sabitax
|
| 4 |
+
LUMA_API_KEY=
|
| 5 |
|
| 6 |
# Security
|
| 7 |
API_KEY=11e10c46685090a8a464f7c8a8f09cd519b69836935a2c8897b71472e2b74138
|
rag/__pycache__/ingest.cpython-312.pyc
CHANGED
|
Binary files a/rag/__pycache__/ingest.cpython-312.pyc and b/rag/__pycache__/ingest.cpython-312.pyc differ
|
|
|
rag/__pycache__/utils.cpython-312.pyc
CHANGED
|
Binary files a/rag/__pycache__/utils.cpython-312.pyc and b/rag/__pycache__/utils.cpython-312.pyc differ
|
|
|
rag/utils.py
CHANGED
|
@@ -7,6 +7,10 @@ from dotenv import load_dotenv
|
|
| 7 |
from google import genai
|
| 8 |
from google.genai import types
|
| 9 |
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
load_dotenv()
|
| 12 |
|
|
@@ -128,41 +132,73 @@ Respond naturally and conversationally. Introduce yourself as SabiTax in a frien
|
|
| 128 |
role = "User" if msg["role"] == "user" else "You (SabiTax)"
|
| 129 |
history_text += f"{role}: {msg['content']}\n"
|
| 130 |
history_text += "\n"
|
| 131 |
-
|
| 132 |
-
prompt = f"""You are SabiTax,
|
| 133 |
-
|
| 134 |
-
Your
|
| 135 |
-
-
|
| 136 |
-
-
|
| 137 |
-
-
|
| 138 |
-
-
|
| 139 |
-
-
|
| 140 |
-
-
|
| 141 |
-
-
|
| 142 |
-
|
| 143 |
-
Your
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
Important rules:
|
| 151 |
-
-
|
| 152 |
-
-
|
| 153 |
-
-
|
| 154 |
-
-
|
| 155 |
-
-
|
| 156 |
-
- If
|
| 157 |
-
- Keep it conversational - use short paragraphs, bullet points when helpful, but write like you're talking
|
| 158 |
-
- If the user is continuing a topic from earlier, acknowledge it and build on the previous conversation
|
| 159 |
|
| 160 |
{history_text}Context from documents:
|
| 161 |
{context}
|
| 162 |
|
| 163 |
Question: {question}
|
| 164 |
|
| 165 |
-
|
| 166 |
|
| 167 |
if image_data:
|
| 168 |
img = Image.open(io.BytesIO(image_data))
|
|
@@ -195,6 +231,228 @@ Respond naturally and conversationally. Explain things like you're helping a fri
|
|
| 195 |
raise Exception("Failed to generate answer after multiple attempts")
|
| 196 |
|
| 197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
def clean_text(text: str) -> str:
|
| 199 |
text = text.encode('utf-8', errors='ignore').decode('utf-8')
|
| 200 |
text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]', '', text)
|
|
|
|
| 7 |
from google import genai
|
| 8 |
from google.genai import types
|
| 9 |
from PIL import Image
|
| 10 |
+
try:
|
| 11 |
+
from lumaai import LumaAI
|
| 12 |
+
except ImportError:
|
| 13 |
+
LumaAI = None
|
| 14 |
|
| 15 |
load_dotenv()
|
| 16 |
|
|
|
|
| 132 |
role = "User" if msg["role"] == "user" else "You (SabiTax)"
|
| 133 |
history_text += f"{role}: {msg['content']}\n"
|
| 134 |
history_text += "\n"
|
| 135 |
+
|
| 136 |
+
prompt = f"""You are SabiTax, Nigeria's comprehensive tax assistant. You help Nigerians with all aspects of tax compliance, from understanding laws to filing returns and analyzing documents.
|
| 137 |
+
|
| 138 |
+
Your expertise covers:
|
| 139 |
+
- **General Tax Questions**: Rates, deadlines, deductions, tax planning
|
| 140 |
+
- **Tax Calculations**: Personal income tax, company tax, VAT, capital gains tax
|
| 141 |
+
- **Form Guidance**: How to complete and file tax forms (Form A, Form B, etc.)
|
| 142 |
+
- **Compliance Requirements**: What to declare, when to file, penalties
|
| 143 |
+
- **Document Analysis**: Reviewing tax returns, financial statements, receipts
|
| 144 |
+
- **Tax Optimization**: Legal ways to minimize tax liability
|
| 145 |
+
- **Business Taxes**: Company registration, payroll taxes, VAT compliance
|
| 146 |
+
|
| 147 |
+
Your communication style:
|
| 148 |
+
- Professional yet approachable, like a trusted tax consultant
|
| 149 |
+
- Explain complex concepts in simple, everyday Nigerian English
|
| 150 |
+
- Use clear examples: "If you earn N3 million yearly, your tax is calculated as..."
|
| 151 |
+
- Be encouraging and patient with all tax-related questions
|
| 152 |
+
- Always emphasize compliance and accuracy
|
| 153 |
+
|
| 154 |
+
How you handle different types of questions:
|
| 155 |
+
|
| 156 |
+
**For General Tax Questions:**
|
| 157 |
+
- Provide accurate information from current Nigerian tax laws
|
| 158 |
+
- Break down calculations step-by-step
|
| 159 |
+
- Reference specific sections of tax acts
|
| 160 |
+
- Give practical examples relevant to Nigerian taxpayers
|
| 161 |
+
|
| 162 |
+
**For Document Analysis:**
|
| 163 |
+
- Identify the type of document and its tax purpose
|
| 164 |
+
- Extract key tax information (amounts, dates, taxpayer details)
|
| 165 |
+
- Check for compliance with Nigerian tax requirements
|
| 166 |
+
- Point out missing information or potential issues
|
| 167 |
+
|
| 168 |
+
**For Tax Calculations:**
|
| 169 |
+
- Use current tax rates and brackets
|
| 170 |
+
- Show step-by-step calculations
|
| 171 |
+
- Explain deductions and allowances
|
| 172 |
+
- Calculate final tax payable
|
| 173 |
+
|
| 174 |
+
**For Filing Guidance:**
|
| 175 |
+
- Explain which forms to use and when
|
| 176 |
+
- Guide through form completion
|
| 177 |
+
- Highlight common mistakes to avoid
|
| 178 |
+
- Provide filing deadlines and methods
|
| 179 |
+
|
| 180 |
+
Tax-specific guidelines:
|
| 181 |
+
- Always reference current legislation (2025 acts take precedence)
|
| 182 |
+
- Use Nigerian Naira (₦) for amounts
|
| 183 |
+
- Reference FIRS (Federal Inland Revenue Service) procedures
|
| 184 |
+
- Explain tax terms clearly: "Assessable profit means your business income after expenses"
|
| 185 |
+
- Highlight tax incentives and reliefs available to Nigerians
|
| 186 |
+
- Emphasize voluntary compliance over penalties
|
| 187 |
|
| 188 |
Important rules:
|
| 189 |
+
- Base answers on the provided context from indexed tax documents
|
| 190 |
+
- If context doesn't have enough information, clearly state this
|
| 191 |
+
- When analyzing documents, be thorough but practical
|
| 192 |
+
- Suggest consulting a professional tax advisor for complex situations
|
| 193 |
+
- Always promote ethical tax practices and full compliance
|
| 194 |
+
- If something is unclear, ask for clarification rather than assuming
|
|
|
|
|
|
|
| 195 |
|
| 196 |
{history_text}Context from documents:
|
| 197 |
{context}
|
| 198 |
|
| 199 |
Question: {question}
|
| 200 |
|
| 201 |
+
Provide comprehensive, accurate tax guidance. Whether it's a general question, document analysis, or calculation help, explain everything clearly and help the user understand their Nigerian tax obligations."""
|
| 202 |
|
| 203 |
if image_data:
|
| 204 |
img = Image.open(io.BytesIO(image_data))
|
|
|
|
| 231 |
raise Exception("Failed to generate answer after multiple attempts")
|
| 232 |
|
| 233 |
|
| 234 |
+
def analyze_financial_documents(document_texts: list, image_data_list: list) -> dict:
|
| 235 |
+
"""Analyze financial documents and extract key financial metrics."""
|
| 236 |
+
|
| 237 |
+
# Combine all document texts
|
| 238 |
+
all_text = "\n\n".join(document_texts) if document_texts else ""
|
| 239 |
+
|
| 240 |
+
# Add image analysis if images are provided
|
| 241 |
+
if image_data_list:
|
| 242 |
+
all_text += "\n\n[Image Analysis]: Please analyze any financial data visible in the images."
|
| 243 |
+
|
| 244 |
+
if not all_text.strip():
|
| 245 |
+
return {
|
| 246 |
+
"income": "No income data found",
|
| 247 |
+
"expenses": "No expense data found",
|
| 248 |
+
"savings": "No savings data found",
|
| 249 |
+
"achievements": "No financial achievements identified",
|
| 250 |
+
"summary": "Unable to analyze financial documents"
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
analysis_prompt = f"""Analyze this financial document and extract key financial information. Focus on:
|
| 254 |
+
|
| 255 |
+
1. **Income**: Total income, sources, trends
|
| 256 |
+
2. **Expenses**: Major expense categories and amounts
|
| 257 |
+
3. **Savings**: Savings rate, emergency fund, investments
|
| 258 |
+
4. **Achievements**: Financial milestones, debt reduction, investment growth
|
| 259 |
+
5. **Summary**: Overall financial health and key insights
|
| 260 |
+
|
| 261 |
+
Document content:
|
| 262 |
+
{all_text[:10000]}
|
| 263 |
+
|
| 264 |
+
Provide a structured analysis with specific amounts where available. If amounts aren't specified, use descriptive terms like "significant" or "moderate"."""
|
| 265 |
+
|
| 266 |
+
try:
|
| 267 |
+
client = get_gemini_client()
|
| 268 |
+
response = client.models.generate_content(
|
| 269 |
+
model="gemini-2.5-flash",
|
| 270 |
+
contents=analysis_prompt
|
| 271 |
+
)
|
| 272 |
+
|
| 273 |
+
# Parse the response to extract structured data
|
| 274 |
+
analysis_text = response.text
|
| 275 |
+
|
| 276 |
+
# Simple parsing - in production you might want more sophisticated parsing
|
| 277 |
+
analysis = {
|
| 278 |
+
"income": "Analysis completed - see detailed summary",
|
| 279 |
+
"expenses": "Analysis completed - see detailed summary",
|
| 280 |
+
"savings": "Analysis completed - see detailed summary",
|
| 281 |
+
"achievements": "Analysis completed - see detailed summary",
|
| 282 |
+
"summary": analysis_text[:1000] # Truncate for response size
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
return analysis
|
| 286 |
+
|
| 287 |
+
except Exception as e:
|
| 288 |
+
return {
|
| 289 |
+
"income": "Error analyzing documents",
|
| 290 |
+
"expenses": "Error analyzing documents",
|
| 291 |
+
"savings": "Error analyzing documents",
|
| 292 |
+
"achievements": "Error analyzing documents",
|
| 293 |
+
"summary": f"Analysis failed: {str(e)}"
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def create_video_script(financial_analysis: dict) -> dict:
|
| 298 |
+
"""Create a professional video script using Gemini 2.5 Flash."""
|
| 299 |
+
|
| 300 |
+
summary = financial_analysis.get("summary", "A year of financial growth and achievements")
|
| 301 |
+
income = financial_analysis.get("income", "Steady income growth")
|
| 302 |
+
expenses = financial_analysis.get("expenses", "Managed expenses effectively")
|
| 303 |
+
savings = financial_analysis.get("savings", "Built savings successfully")
|
| 304 |
+
achievements = financial_analysis.get("achievements", "Achieved financial goals")
|
| 305 |
+
|
| 306 |
+
script_prompt = f"""Create a professional year-in-review financial video script based on this user data:
|
| 307 |
+
|
| 308 |
+
FINANCIAL DATA:
|
| 309 |
+
- Summary: {summary}
|
| 310 |
+
- Income: {income}
|
| 311 |
+
- Expenses: {expenses}
|
| 312 |
+
- Savings: {savings}
|
| 313 |
+
- Achievements: {achievements}
|
| 314 |
+
|
| 315 |
+
Create a cinematic 15-20 second video with 4-6 scenes. Output JSON with:
|
| 316 |
+
|
| 317 |
+
{{
|
| 318 |
+
"scenes": [
|
| 319 |
+
{{
|
| 320 |
+
"scene_number": 1,
|
| 321 |
+
"duration": "3-4 seconds",
|
| 322 |
+
"description": "Brief scene description",
|
| 323 |
+
"video_prompt": "Detailed prompt for video generation AI",
|
| 324 |
+
"voiceover": "Voiceover text for this scene"
|
| 325 |
+
}}
|
| 326 |
+
],
|
| 327 |
+
"music_mood": "uplifting, motivational, professional",
|
| 328 |
+
"overall_theme": "Financial success and growth",
|
| 329 |
+
"total_duration": "15-20 seconds"
|
| 330 |
+
}}
|
| 331 |
+
|
| 332 |
+
Make it professional, celebratory, and focused on financial achievements. Use Nigerian context where appropriate."""
|
| 333 |
+
|
| 334 |
+
try:
|
| 335 |
+
client = get_gemini_client()
|
| 336 |
+
|
| 337 |
+
response = client.models.generate_content(
|
| 338 |
+
model="gemini-2.5-flash",
|
| 339 |
+
contents=script_prompt
|
| 340 |
+
)
|
| 341 |
+
|
| 342 |
+
# Try to parse as JSON, fallback to text processing
|
| 343 |
+
try:
|
| 344 |
+
import json
|
| 345 |
+
script_data = json.loads(response.text.strip())
|
| 346 |
+
return script_data
|
| 347 |
+
except json.JSONDecodeError:
|
| 348 |
+
# Fallback: create structured script from text
|
| 349 |
+
return {
|
| 350 |
+
"scenes": [
|
| 351 |
+
{
|
| 352 |
+
"scene_number": 1,
|
| 353 |
+
"duration": "5 seconds",
|
| 354 |
+
"description": "Financial overview and achievements",
|
| 355 |
+
"video_prompt": f"Create a professional financial recap video showing: {summary}. Use animated charts, money visualizations, and success indicators.",
|
| 356 |
+
"voiceover": f"This year brought remarkable financial growth: {summary}"
|
| 357 |
+
}
|
| 358 |
+
],
|
| 359 |
+
"music_mood": "uplifting, professional",
|
| 360 |
+
"overall_theme": "Financial success story",
|
| 361 |
+
"total_duration": "15 seconds"
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
except Exception as e:
|
| 365 |
+
# Ultimate fallback
|
| 366 |
+
return {
|
| 367 |
+
"scenes": [
|
| 368 |
+
{
|
| 369 |
+
"scene_number": 1,
|
| 370 |
+
"duration": "5 seconds",
|
| 371 |
+
"description": "Financial success visualization",
|
| 372 |
+
"video_prompt": f"Professional financial recap: {summary}. Show growing charts, money animations, success celebrations.",
|
| 373 |
+
"voiceover": f"A year of financial achievements: {summary}"
|
| 374 |
+
}
|
| 375 |
+
],
|
| 376 |
+
"music_mood": "motivational",
|
| 377 |
+
"overall_theme": "Financial growth",
|
| 378 |
+
"total_duration": "10 seconds"
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
def generate_yearly_wrap_video(financial_analysis: dict) -> str:
|
| 383 |
+
"""Generate yearly financial wrap video using Gemini + LumaAI pipeline."""
|
| 384 |
+
|
| 385 |
+
# Step 1: Gemini creates the video script
|
| 386 |
+
script_data = create_video_script(financial_analysis)
|
| 387 |
+
|
| 388 |
+
# Step 2: Use the script to create video prompt
|
| 389 |
+
scenes = script_data.get("scenes", [])
|
| 390 |
+
if not scenes:
|
| 391 |
+
# Fallback scene
|
| 392 |
+
scenes = [{
|
| 393 |
+
"scene_number": 1,
|
| 394 |
+
"duration": "5 seconds",
|
| 395 |
+
"description": "Financial overview",
|
| 396 |
+
"video_prompt": f"Professional financial recap video showing: {financial_analysis.get('summary', 'Financial achievements')}. Animated charts, money visualizations, success indicators.",
|
| 397 |
+
"voiceover": f"Financial highlights: {financial_analysis.get('summary', 'Great year')}"
|
| 398 |
+
}]
|
| 399 |
+
|
| 400 |
+
# Combine all scene prompts into one comprehensive prompt
|
| 401 |
+
combined_prompt = f"""Create a cinematic financial year-in-review video:
|
| 402 |
+
|
| 403 |
+
{scenes[0]['video_prompt']}
|
| 404 |
+
|
| 405 |
+
Style: Professional, celebratory, modern financial visualization with animated charts, money effects, and success celebrations."""
|
| 406 |
+
|
| 407 |
+
try:
|
| 408 |
+
if LumaAI is None:
|
| 409 |
+
raise Exception("Video generation not available - lumaai not installed")
|
| 410 |
+
|
| 411 |
+
luma_api_key = os.environ.get("LUMAAI_API_KEY")
|
| 412 |
+
if not luma_api_key:
|
| 413 |
+
raise Exception("LUMAAI_API_KEY environment variable is not set")
|
| 414 |
+
|
| 415 |
+
client = LumaAI(auth_token=luma_api_key)
|
| 416 |
+
|
| 417 |
+
generation = client.generations.create(
|
| 418 |
+
prompt=combined_prompt,
|
| 419 |
+
model="ray-2",
|
| 420 |
+
aspect_ratio="16:9",
|
| 421 |
+
resolution="720p",
|
| 422 |
+
duration="5s",
|
| 423 |
+
loop=False
|
| 424 |
+
)
|
| 425 |
+
|
| 426 |
+
# Poll for completion
|
| 427 |
+
max_attempts = 30 # 2.5 minutes max
|
| 428 |
+
for attempt in range(max_attempts):
|
| 429 |
+
status = client.generations.get(generation.id)
|
| 430 |
+
|
| 431 |
+
if status.state == "completed":
|
| 432 |
+
return status.assets.video
|
| 433 |
+
elif status.state == "failed":
|
| 434 |
+
failure_reason = getattr(status, 'failure_reason', 'Unknown failure')
|
| 435 |
+
raise Exception(f"Video generation failed: {failure_reason}")
|
| 436 |
+
elif status.state in ["dreaming", "in_progress", "pending"]:
|
| 437 |
+
# Still processing, continue polling
|
| 438 |
+
pass
|
| 439 |
+
else:
|
| 440 |
+
# Unknown state, log and continue
|
| 441 |
+
print(f"Unknown generation state: {status.state}")
|
| 442 |
+
|
| 443 |
+
time.sleep(5)
|
| 444 |
+
|
| 445 |
+
raise Exception("Video generation timed out after 2.5 minutes")
|
| 446 |
+
|
| 447 |
+
except Exception as e:
|
| 448 |
+
error_msg = str(e).lower()
|
| 449 |
+
if "unavailable" in error_msg or "rate limit" in error_msg or "quota" in error_msg:
|
| 450 |
+
# Service temporarily unavailable - return None instead of failing
|
| 451 |
+
return None
|
| 452 |
+
else:
|
| 453 |
+
raise Exception(f"Video generation failed: {str(e)}")
|
| 454 |
+
|
| 455 |
+
|
| 456 |
def clean_text(text: str) -> str:
|
| 457 |
text = text.encode('utf-8', errors='ignore').decode('utf-8')
|
| 458 |
text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]', '', text)
|
requirements.txt
CHANGED
|
@@ -8,3 +8,6 @@ google-genai
|
|
| 8 |
pydantic
|
| 9 |
python-dotenv
|
| 10 |
Pillow
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
pydantic
|
| 9 |
python-dotenv
|
| 10 |
Pillow
|
| 11 |
+
requests
|
| 12 |
+
docx2txt
|
| 13 |
+
lumaai
|
test_statement.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Bank Statement 2024
|
| 2 |
+
|
| 3 |
+
Income: ₦5,000,000
|
| 4 |
+
Expenses: ₦3,200,000
|
| 5 |
+
Savings: ₦1,800,000
|
| 6 |
+
|
| 7 |
+
Financial Summary:
|
| 8 |
+
- Good year with increased savings
|
| 9 |
+
- Reduced expenses by 10%
|
| 10 |
+
- Investment portfolio grew by 15%
|