Spaces:
Runtime error
Runtime error
Commit
·
f8b25ce
1
Parent(s):
5dd63cb
Add config endpoint
Browse files- KTP.jpg +0 -0
- app/__pycache__/dependencies.cpython-310.pyc +0 -0
- app/__pycache__/main.cpython-310.pyc +0 -0
- app/api/v1/endpoints/__pycache__/auth.cpython-310.pyc +0 -0
- app/api/v1/endpoints/__pycache__/config.cpython-310.pyc +0 -0
- app/api/v1/endpoints/__pycache__/ocr.cpython-310.pyc +0 -0
- app/api/v1/endpoints/__pycache__/ocrtemplate.cpython-310.pyc +0 -0
- app/api/v1/endpoints/__pycache__/user.cpython-310.pyc +0 -0
- app/api/v1/endpoints/config.py +37 -0
- app/api/v1/endpoints/ocr.py +101 -0
- app/api/v1/endpoints/ocrtemplate.py +49 -0
- app/api/v1/endpoints/user.py +6 -6
- app/core/__pycache__/config.cpython-310.pyc +0 -0
- app/core/__pycache__/database.cpython-310.pyc +0 -0
- app/core/__pycache__/security.cpython-310.pyc +0 -0
- app/core/config.py +1 -1
- app/core/creamodel.py +12 -0
- app/crud/__pycache__/ocr.cpython-310.pyc +0 -0
- app/crud/__pycache__/ocrtemplate.cpython-310.pyc +0 -0
- app/crud/__pycache__/users.cpython-310.pyc +0 -0
- app/crud/ocr.py +156 -0
- app/crud/ocrtemplate.py +73 -0
- app/crud/users.py +5 -2
- app/db/__pycache__/base.cpython-310.pyc +0 -0
- app/db/models/__pycache__/config.cpython-310.pyc +0 -0
- app/db/models/config.py +12 -0
- app/main.py +11 -3
- app/models/__pycache__/bpjs.cpython-310.pyc +0 -0
- app/models/__pycache__/ocrtemplate.cpython-310.pyc +0 -0
- app/models/__pycache__/token.cpython-310.pyc +0 -0
- app/models/__pycache__/users.cpython-310.pyc +0 -0
- app/models/ocrtemplate.py +12 -0
- app/models/users.py +2 -0
KTP.jpg
ADDED
|
app/__pycache__/dependencies.cpython-310.pyc
CHANGED
|
Binary files a/app/__pycache__/dependencies.cpython-310.pyc and b/app/__pycache__/dependencies.cpython-310.pyc differ
|
|
|
app/__pycache__/main.cpython-310.pyc
CHANGED
|
Binary files a/app/__pycache__/main.cpython-310.pyc and b/app/__pycache__/main.cpython-310.pyc differ
|
|
|
app/api/v1/endpoints/__pycache__/auth.cpython-310.pyc
CHANGED
|
Binary files a/app/api/v1/endpoints/__pycache__/auth.cpython-310.pyc and b/app/api/v1/endpoints/__pycache__/auth.cpython-310.pyc differ
|
|
|
app/api/v1/endpoints/__pycache__/config.cpython-310.pyc
ADDED
|
Binary file (984 Bytes). View file
|
|
|
app/api/v1/endpoints/__pycache__/ocr.cpython-310.pyc
ADDED
|
Binary file (2.87 kB). View file
|
|
|
app/api/v1/endpoints/__pycache__/ocrtemplate.cpython-310.pyc
ADDED
|
Binary file (2.15 kB). View file
|
|
|
app/api/v1/endpoints/__pycache__/user.cpython-310.pyc
CHANGED
|
Binary files a/app/api/v1/endpoints/__pycache__/user.cpython-310.pyc and b/app/api/v1/endpoints/__pycache__/user.cpython-310.pyc differ
|
|
|
app/api/v1/endpoints/config.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends, HTTPException
|
| 2 |
+
from app.db.models.config import *
|
| 3 |
+
from app.core.config import settings
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
router = APIRouter()
|
| 7 |
+
|
| 8 |
+
@router.get("/", response_model=ConfigUpdateRequest)
|
| 9 |
+
async def get_config():
|
| 10 |
+
return settings
|
| 11 |
+
|
| 12 |
+
@router.put("/", response_model=ConfigUpdateRequest)
|
| 13 |
+
async def update_config(config: ConfigUpdateRequest):
|
| 14 |
+
if config.MONGO_DETAILS is not None and config.MONGO_DETAILS != "string":
|
| 15 |
+
settings.MONGO_DETAILS = config.MONGO_DETAILS
|
| 16 |
+
if config.MongoDB_NAME is not None and config.MongoDB_NAME != "string":
|
| 17 |
+
settings.MongoDB_NAME = config.MongoDB_NAME
|
| 18 |
+
if config.COLLECTION_NAMES is not None and config.COLLECTION_NAMES != "string":
|
| 19 |
+
settings.COLLECTION_NAMES = config.COLLECTION_NAMES
|
| 20 |
+
if config.SECRET_KEY is not None and config.SECRET_KEY != "string":
|
| 21 |
+
settings.SECRET_KEY = config.SECRET_KEY
|
| 22 |
+
if config.ALGORITHM is not None and config.ALGORITHM != "string":
|
| 23 |
+
settings.ALGORITHM = config.ALGORITHM
|
| 24 |
+
if config.ACCESS_TOKEN_EXPIRE_MINUTES is not None and config.ACCESS_TOKEN_EXPIRE_MINUTES != 0:
|
| 25 |
+
settings.ACCESS_TOKEN_EXPIRE_MINUTES = config.ACCESS_TOKEN_EXPIRE_MINUTES
|
| 26 |
+
|
| 27 |
+
# return {
|
| 28 |
+
# "MONGO_DETAILS": settings.MONGO_DETAILS,
|
| 29 |
+
# "MongoDB_NAME": settings.MongoDB_NAME,
|
| 30 |
+
# "COLLECTION_NAMES": settings.COLLECTION_NAMES,
|
| 31 |
+
# "SECRET_KEY": settings.SECRET_KEY,
|
| 32 |
+
# "ALGORITHM": settings.ALGORITHM,
|
| 33 |
+
# "ACCESS_TOKEN_EXPIRE_MINUTES": settings.ACCESS_TOKEN_EXPIRE_MINUTES
|
| 34 |
+
# }
|
| 35 |
+
return settings
|
| 36 |
+
|
| 37 |
+
|
app/api/v1/endpoints/ocr.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/api/v1/endpoints/ocr.py
|
| 2 |
+
from fastapi import APIRouter, Depends, HTTPException
|
| 3 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 4 |
+
from fastapi.responses import JSONResponse
|
| 5 |
+
from fastapi import Form
|
| 6 |
+
from app.models.users import *
|
| 7 |
+
from app.crud.users import *
|
| 8 |
+
from app.crud.ocr import *
|
| 9 |
+
from app.crud.ocrtemplate import *
|
| 10 |
+
from app.models.ocrtemplate import *
|
| 11 |
+
from app.core.security import get_password_hash
|
| 12 |
+
from app.dependencies import get_current_user
|
| 13 |
+
|
| 14 |
+
from typing import List
|
| 15 |
+
|
| 16 |
+
import os
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
router = APIRouter()
|
| 20 |
+
|
| 21 |
+
@router.post("/")
|
| 22 |
+
async def upload_file(file: UploadFile = File(...)):
|
| 23 |
+
try:
|
| 24 |
+
# Save the uploaded file to a temporary location
|
| 25 |
+
with open(f"/tmp/{file.filename}", "wb") as buffer:
|
| 26 |
+
buffer.write(await file.read())
|
| 27 |
+
|
| 28 |
+
# Perform OCR on the saved image
|
| 29 |
+
formatted_output = await do_ocr_tesseract(f"/tmp/{file.filename}")
|
| 30 |
+
|
| 31 |
+
# Return the formatted output
|
| 32 |
+
return JSONResponse(content=formatted_output)
|
| 33 |
+
|
| 34 |
+
except Exception as e:
|
| 35 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
| 36 |
+
|
| 37 |
+
@router.post("/process")
|
| 38 |
+
async def upload_files(files: List[UploadFile] = File(...), template_name: str = Form(...), current_user: User = Depends(get_current_user)):
|
| 39 |
+
results = []
|
| 40 |
+
try:
|
| 41 |
+
for file in files:
|
| 42 |
+
filepath = f"/tmp/{file.filename}"
|
| 43 |
+
# Save the uploaded file to a temporary location
|
| 44 |
+
with open(filepath, "wb") as buffer:
|
| 45 |
+
buffer.write(await file.read())
|
| 46 |
+
|
| 47 |
+
# Perform OCR on the saved image
|
| 48 |
+
formatted_output = await do_ocr_tesseract(filepath)
|
| 49 |
+
|
| 50 |
+
# Parse the output
|
| 51 |
+
parsed_data = await parse_ocr_output(formatted_output, template_name)
|
| 52 |
+
|
| 53 |
+
save_data = await create_data_from_template(template_name, parsed_data, current_user['user_id'])
|
| 54 |
+
|
| 55 |
+
results.append({"filename": file.filename, "output": formatted_output})
|
| 56 |
+
|
| 57 |
+
os.remove(filepath)
|
| 58 |
+
|
| 59 |
+
# Return the formatted output
|
| 60 |
+
return JSONResponse(content=save_data.dict())
|
| 61 |
+
|
| 62 |
+
except Exception as e:
|
| 63 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
| 64 |
+
|
| 65 |
+
def format_string(input_string: str) -> str:
|
| 66 |
+
# Split the string by colon
|
| 67 |
+
parts = input_string.split(':')
|
| 68 |
+
|
| 69 |
+
# Strip extra spaces from each part
|
| 70 |
+
formatted_parts = [part.strip() for part in parts]
|
| 71 |
+
|
| 72 |
+
# Join the parts with a single colon
|
| 73 |
+
formatted_string = ':'.join(formatted_parts)
|
| 74 |
+
|
| 75 |
+
return formatted_string
|
| 76 |
+
|
| 77 |
+
async def parse_ocr_output(raw_output: str, template_name: str) -> Dict[str, str]:
|
| 78 |
+
|
| 79 |
+
if raw_output is None:
|
| 80 |
+
return {"error": "No output to parse"}
|
| 81 |
+
if template_name is None:
|
| 82 |
+
return {"error": "No template provided"}
|
| 83 |
+
|
| 84 |
+
template = await get_template_by_name(template_name)
|
| 85 |
+
|
| 86 |
+
lines = raw_output.splitlines()
|
| 87 |
+
# # Decode escape sequences in the string
|
| 88 |
+
# decoded_output = raw_output.encode().decode('unicode_escape')
|
| 89 |
+
# print(f"Decoded output: {repr(decoded_output)}")
|
| 90 |
+
|
| 91 |
+
# lines = decoded_output.splitlines() # This will handle \n, \r\n, and \r correctly
|
| 92 |
+
# print(f"Lines: {lines}")
|
| 93 |
+
|
| 94 |
+
parsed_data = {}
|
| 95 |
+
for line in lines:
|
| 96 |
+
line = format_string(line)
|
| 97 |
+
print(f"Processing line: {line}")
|
| 98 |
+
for field_name, field_value in template.fields.items():
|
| 99 |
+
if line.startswith(field_value + ":"):
|
| 100 |
+
parsed_data[field_value] = line.split(':', 1)[1].strip()
|
| 101 |
+
return parsed_data
|
app/api/v1/endpoints/ocrtemplate.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException, Depends
|
| 2 |
+
from typing import List
|
| 3 |
+
from app.models.ocrtemplate import OCRTemplate
|
| 4 |
+
from app.models.users import User
|
| 5 |
+
from app.crud.ocrtemplate import *
|
| 6 |
+
from app.dependencies import get_current_user # Assume this dependency retrieves the current user # Assume you have a database dependency
|
| 7 |
+
|
| 8 |
+
router = APIRouter()
|
| 9 |
+
|
| 10 |
+
# In-memory storage for simplicity; replace with your database logic
|
| 11 |
+
templates = {}
|
| 12 |
+
|
| 13 |
+
@router.post("/", response_model=OCRTemplate)
|
| 14 |
+
async def create_OCR_template(template: OCRTemplate, current_user: User = Depends(get_current_user)):
|
| 15 |
+
|
| 16 |
+
template = await create_template(template, current_user['user_id'])
|
| 17 |
+
return template
|
| 18 |
+
|
| 19 |
+
@router.get("/", response_model=List[Union[OCRTemplateInDB, dict]])
|
| 20 |
+
async def get_OCR_templates_for_user(template_name:Optional[bool] = False, current_user: User = Depends(get_current_user)):
|
| 21 |
+
|
| 22 |
+
templates = await get_all_templates_by_user_id(current_user['user_id'], template_name)
|
| 23 |
+
return templates
|
| 24 |
+
|
| 25 |
+
@router.get("/{template_name}", response_model=OCRTemplate)
|
| 26 |
+
async def get_template(template_name: str, current_user: dict = Depends(get_current_user)):
|
| 27 |
+
user_id = current_user['user_id']
|
| 28 |
+
template = await get_template_by_name_and_user(template_name, user_id)
|
| 29 |
+
if not template:
|
| 30 |
+
raise HTTPException(status_code=404, detail="Template not found")
|
| 31 |
+
return template
|
| 32 |
+
|
| 33 |
+
@router.put("/templates", response_model=OCRTemplate)
|
| 34 |
+
async def update_template_endpoint(template: OCRTemplate, user: str = Depends(get_current_user)):
|
| 35 |
+
print("Updating template:", template)
|
| 36 |
+
print("User ID:", user['user_id'])
|
| 37 |
+
updated_template = await update_template(template.template_name, template, user['user_id'])
|
| 38 |
+
if not updated_template:
|
| 39 |
+
raise HTTPException(status_code=404, detail="Template not found or could not be updated")
|
| 40 |
+
return updated_template
|
| 41 |
+
|
| 42 |
+
@router.delete("/{template_name}")
|
| 43 |
+
async def delete_template_endpoint(template_name: str, current_user: str = Depends(get_current_user)):
|
| 44 |
+
user_id = current_user['user_id']
|
| 45 |
+
|
| 46 |
+
deleted_template = await delete_template(template_name, user_id)
|
| 47 |
+
if not deleted_template:
|
| 48 |
+
raise HTTPException(status_code=404, detail="Template not found or could not be deleted")
|
| 49 |
+
return {"detail": f"{template_name } templatedeleted"}
|
app/api/v1/endpoints/user.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# app/api/v1/endpoints/user.py
|
| 2 |
from fastapi import APIRouter, Depends, HTTPException
|
| 3 |
-
from app.models
|
| 4 |
from app.crud.users import *
|
| 5 |
from app.core.security import get_password_hash
|
| 6 |
from app.dependencies import get_current_user
|
|
@@ -8,7 +8,7 @@ from app.dependencies import get_current_user
|
|
| 8 |
router = APIRouter()
|
| 9 |
|
| 10 |
@router.post("/")
|
| 11 |
-
async def register_user(user: User):
|
| 12 |
|
| 13 |
db_user = await get_user_by_username(user.username)
|
| 14 |
if db_user:
|
|
@@ -20,12 +20,12 @@ async def register_user(user: User):
|
|
| 20 |
|
| 21 |
user_in_db = UserInDB(**user.dict(), hashed_password=get_password_hash(user.password))
|
| 22 |
|
| 23 |
-
|
| 24 |
|
| 25 |
-
return
|
| 26 |
|
| 27 |
-
@router.get("/me/", response_model=User)
|
| 28 |
-
async def read_users_me(current_user: User = Depends(get_current_user)):
|
| 29 |
print("Current user:", current_user)
|
| 30 |
|
| 31 |
|
|
|
|
| 1 |
# app/api/v1/endpoints/user.py
|
| 2 |
from fastapi import APIRouter, Depends, HTTPException
|
| 3 |
+
from app.models import users
|
| 4 |
from app.crud.users import *
|
| 5 |
from app.core.security import get_password_hash
|
| 6 |
from app.dependencies import get_current_user
|
|
|
|
| 8 |
router = APIRouter()
|
| 9 |
|
| 10 |
@router.post("/")
|
| 11 |
+
async def register_user(user: users.User):
|
| 12 |
|
| 13 |
db_user = await get_user_by_username(user.username)
|
| 14 |
if db_user:
|
|
|
|
| 20 |
|
| 21 |
user_in_db = UserInDB(**user.dict(), hashed_password=get_password_hash(user.password))
|
| 22 |
|
| 23 |
+
user = await create_user(user_in_db)
|
| 24 |
|
| 25 |
+
return user
|
| 26 |
|
| 27 |
+
@router.get("/me/", response_model=users.User)
|
| 28 |
+
async def read_users_me(current_user: users.User = Depends(get_current_user)):
|
| 29 |
print("Current user:", current_user)
|
| 30 |
|
| 31 |
|
app/core/__pycache__/config.cpython-310.pyc
CHANGED
|
Binary files a/app/core/__pycache__/config.cpython-310.pyc and b/app/core/__pycache__/config.cpython-310.pyc differ
|
|
|
app/core/__pycache__/database.cpython-310.pyc
CHANGED
|
Binary files a/app/core/__pycache__/database.cpython-310.pyc and b/app/core/__pycache__/database.cpython-310.pyc differ
|
|
|
app/core/__pycache__/security.cpython-310.pyc
CHANGED
|
Binary files a/app/core/__pycache__/security.cpython-310.pyc and b/app/core/__pycache__/security.cpython-310.pyc differ
|
|
|
app/core/config.py
CHANGED
|
@@ -12,7 +12,7 @@ class Settings(BaseSettings):
|
|
| 12 |
COLLECTION_NAMES: list = ["users", "files", "templates", "extracted data", "external credentials"]
|
| 13 |
SECRET_KEY: str = os.getenv("SECRET_KEY")
|
| 14 |
ALGORITHM: str = "HS256"
|
| 15 |
-
ACCESS_TOKEN_EXPIRE_MINUTES: int =
|
| 16 |
|
| 17 |
settings = Settings()
|
| 18 |
|
|
|
|
| 12 |
COLLECTION_NAMES: list = ["users", "files", "templates", "extracted data", "external credentials"]
|
| 13 |
SECRET_KEY: str = os.getenv("SECRET_KEY")
|
| 14 |
ALGORITHM: str = "HS256"
|
| 15 |
+
ACCESS_TOKEN_EXPIRE_MINUTES: int = 100
|
| 16 |
|
| 17 |
settings = Settings()
|
| 18 |
|
app/core/creamodel.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, create_model
|
| 2 |
+
from typing import Dict
|
| 3 |
+
|
| 4 |
+
def create_pydantic_model(name: str, variable_dict: Dict[str, str]) -> BaseModel:
|
| 5 |
+
fields = {key: (str, ...) for key in variable_dict.values()}
|
| 6 |
+
return create_model(name, **fields, user_id=(str, ...))
|
| 7 |
+
|
| 8 |
+
def parse_data(model: BaseModel, data_str: str, user_id:str) -> Dict[str, str]:
|
| 9 |
+
data_lines = data_str.split('\n')
|
| 10 |
+
data_dict = {line.split(': ')[0]: line.split(': ')[1] for line in data_lines if ': ' in line}
|
| 11 |
+
data_dict['user_id'] = user_id
|
| 12 |
+
return model(**data_dict).dict()
|
app/crud/__pycache__/ocr.cpython-310.pyc
ADDED
|
Binary file (4.56 kB). View file
|
|
|
app/crud/__pycache__/ocrtemplate.cpython-310.pyc
ADDED
|
Binary file (2.94 kB). View file
|
|
|
app/crud/__pycache__/users.cpython-310.pyc
CHANGED
|
Binary files a/app/crud/__pycache__/users.cpython-310.pyc and b/app/crud/__pycache__/users.cpython-310.pyc differ
|
|
|
app/crud/ocr.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import pytesseract
|
| 4 |
+
import re, cv2
|
| 5 |
+
import imutils
|
| 6 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 7 |
+
from app.models.ocrtemplate import *
|
| 8 |
+
from app.core.database import get_database
|
| 9 |
+
from app.core.config import settings
|
| 10 |
+
from typing import Any
|
| 11 |
+
from fastapi import HTTPException
|
| 12 |
+
from pytesseract import Output
|
| 13 |
+
|
| 14 |
+
pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
|
| 15 |
+
|
| 16 |
+
def identify_structure(line):
|
| 17 |
+
line = line.strip()
|
| 18 |
+
|
| 19 |
+
if line.count(':') > 1:
|
| 20 |
+
return 'mixed-column'
|
| 21 |
+
|
| 22 |
+
if ':' in line or re.search(r'^[A-Za-z]+\s+[A-Za-z0-9]+', line):
|
| 23 |
+
return 'key-value'
|
| 24 |
+
|
| 25 |
+
uppercase_words = re.findall(r'\b[A-Z]+\b', line)
|
| 26 |
+
numbers = re.findall(r'\b\d+\b', line)
|
| 27 |
+
|
| 28 |
+
if len(uppercase_words) > 1:
|
| 29 |
+
return 'table-header'
|
| 30 |
+
|
| 31 |
+
if len(numbers) > 1 and len(uppercase_words) <= 1:
|
| 32 |
+
return 'table-row'
|
| 33 |
+
|
| 34 |
+
return 'text'
|
| 35 |
+
|
| 36 |
+
def format_extracted_text(text):
|
| 37 |
+
lines = text.split('\n')
|
| 38 |
+
lines = [line.strip() for line in lines if line.strip()]
|
| 39 |
+
|
| 40 |
+
formatted_text = []
|
| 41 |
+
in_table = False
|
| 42 |
+
|
| 43 |
+
for line in lines:
|
| 44 |
+
structure = identify_structure(line)
|
| 45 |
+
|
| 46 |
+
if structure == 'mixed-column':
|
| 47 |
+
parts = line.split(':')
|
| 48 |
+
formatted_parts = [f"{parts[i].strip()}: {parts[i+1].strip()}" for i in range(0, len(parts)-1, 2)]
|
| 49 |
+
formatted_text.extend(formatted_parts)
|
| 50 |
+
in_table = False
|
| 51 |
+
elif structure == 'key-value':
|
| 52 |
+
formatted_text.append(line)
|
| 53 |
+
in_table = False
|
| 54 |
+
elif structure == 'table-header':
|
| 55 |
+
formatted_text.append(line)
|
| 56 |
+
in_table = True
|
| 57 |
+
elif structure == 'table-row' and in_table:
|
| 58 |
+
formatted_text.append(line)
|
| 59 |
+
else:
|
| 60 |
+
if in_table:
|
| 61 |
+
in_table = False
|
| 62 |
+
formatted_text.append("\n")
|
| 63 |
+
formatted_text.append(line)
|
| 64 |
+
|
| 65 |
+
return "\n".join(formatted_text)
|
| 66 |
+
|
| 67 |
+
def refine_text_formatting(text):
|
| 68 |
+
text = re.sub(r'\s+', ' ', text)
|
| 69 |
+
text = re.sub(r'\.\s', '.\n', text)
|
| 70 |
+
text = re.sub(r'\s*:\s*', ': ', text)
|
| 71 |
+
return text
|
| 72 |
+
|
| 73 |
+
def do_ocr(image_path):
|
| 74 |
+
image = Image.open(image_path)
|
| 75 |
+
extracted_text = pytesseract.image_to_string(image)
|
| 76 |
+
formatted_text = format_extracted_text(extracted_text)
|
| 77 |
+
return formatted_text
|
| 78 |
+
|
| 79 |
+
async def do_ocr_tesseract(image_path):
|
| 80 |
+
loop = asyncio.get_event_loop()
|
| 81 |
+
with ThreadPoolExecutor() as pool:
|
| 82 |
+
formatted_text = await loop.run_in_executor(pool, do_ocr, image_path)
|
| 83 |
+
return formatted_text
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
async def create_data_from_template(template_name:str, fields:Dict[str, str], user_id:str) -> OCRTemplateInDB:
|
| 87 |
+
template = OCRTemplateInDB(template_name=template_name, fields=fields, user_id=user_id)
|
| 88 |
+
db = get_database(settings.MongoDB_NAME)
|
| 89 |
+
result = await db["extracted data"].insert_one(template.dict())
|
| 90 |
+
if template:
|
| 91 |
+
return template
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def preprocess_image(image: Any) -> Any:
|
| 96 |
+
# Convert to grayscale
|
| 97 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 98 |
+
|
| 99 |
+
# Apply Gaussian blur
|
| 100 |
+
blur = cv2.GaussianBlur(gray, (3, 3), 0)
|
| 101 |
+
|
| 102 |
+
# Apply Otsu's thresholding
|
| 103 |
+
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
| 104 |
+
|
| 105 |
+
# Morph open to remove noise and invert image
|
| 106 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
|
| 107 |
+
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
|
| 108 |
+
invert = 255 - opening
|
| 109 |
+
|
| 110 |
+
return invert
|
| 111 |
+
|
| 112 |
+
async def detect_rotation(image_path: str) -> Any:
|
| 113 |
+
# Load the input image
|
| 114 |
+
image = cv2.imread(image_path)
|
| 115 |
+
if image is None:
|
| 116 |
+
raise HTTPException(status_code=400, detail="Image not found or unable to read")
|
| 117 |
+
|
| 118 |
+
# Convert from BGR to RGB channel ordering
|
| 119 |
+
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 120 |
+
|
| 121 |
+
# Use Tesseract to determine the text orientation
|
| 122 |
+
results = pytesseract.image_to_osd(rgb, output_type=Output.DICT)
|
| 123 |
+
|
| 124 |
+
# Display the orientation information
|
| 125 |
+
print("[INFO] detected orientation: {}".format(results["orientation"]))
|
| 126 |
+
print("[INFO] rotate by {} degrees to correct".format(results["rotate"]))
|
| 127 |
+
print("[INFO] detected script: {}".format(results["script"]))
|
| 128 |
+
|
| 129 |
+
# Rotate the image to correct the orientation
|
| 130 |
+
rotated = imutils.rotate_bound(image, angle=results["rotate"])
|
| 131 |
+
|
| 132 |
+
return rotated
|
| 133 |
+
|
| 134 |
+
async def tesseract_ocr(image_path: str) -> str:
|
| 135 |
+
# Detect rotation and get the image
|
| 136 |
+
image = await detect_rotation(image_path)
|
| 137 |
+
|
| 138 |
+
# Preprocess the image
|
| 139 |
+
preprocessed_image = preprocess_image(image)
|
| 140 |
+
|
| 141 |
+
# Perform OCR using Tesseract
|
| 142 |
+
result = pytesseract.image_to_string(preprocessed_image, config='--psm 6')
|
| 143 |
+
|
| 144 |
+
formatted_text = format_extracted_text(result)
|
| 145 |
+
return formatted_text
|
| 146 |
+
# Example usage
|
| 147 |
+
async def main():
|
| 148 |
+
image_path = 'KTP.jpg'
|
| 149 |
+
formatted_text = await do_ocr_tesseract(image_path)
|
| 150 |
+
formatted_text_pre = await tesseract_ocr(image_path)
|
| 151 |
+
print(formatted_text)
|
| 152 |
+
print(formatted_text_pre)
|
| 153 |
+
|
| 154 |
+
# asyncio.run(main())
|
| 155 |
+
|
| 156 |
+
|
app/crud/ocrtemplate.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException, Depends
|
| 2 |
+
from app.core.database import get_database
|
| 3 |
+
from app.models.ocrtemplate import OCRTemplate, OCRTemplateInDB
|
| 4 |
+
from app.core.security import get_password_hash, verify_password
|
| 5 |
+
from bson import ObjectId
|
| 6 |
+
from app.core.config import settings
|
| 7 |
+
from typing import List, Optional, Union
|
| 8 |
+
from pymongo import ReturnDocument
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
async def create_template(template: OCRTemplate, user_id: str):
|
| 13 |
+
existing_template = await get_template_by_name_and_user(template.template_name, user_id)
|
| 14 |
+
if existing_template:
|
| 15 |
+
raise HTTPException(status_code=401, detail="Template already exists")
|
| 16 |
+
|
| 17 |
+
template_dict = template.dict()
|
| 18 |
+
template_dict["user_id"] = user_id
|
| 19 |
+
db = get_database(settings.MongoDB_NAME)
|
| 20 |
+
template = await db["templates"].insert_one(template_dict)
|
| 21 |
+
if template:
|
| 22 |
+
return OCRTemplate(**template_dict)
|
| 23 |
+
return None
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
async def get_all_templates_by_user_id(user_id: str, template_name: Optional[bool] = False)-> Union[List[OCRTemplateInDB], List[dict]]:
|
| 27 |
+
db = get_database(settings.MongoDB_NAME)
|
| 28 |
+
templates = await db["templates"].find({"user_id": user_id}).to_list(1000)
|
| 29 |
+
|
| 30 |
+
if template_name:
|
| 31 |
+
# Return only the template_name field
|
| 32 |
+
return [{"template_name": template["template_name"]} for template in templates]
|
| 33 |
+
|
| 34 |
+
return [OCRTemplateInDB(**template) for template in templates]
|
| 35 |
+
|
| 36 |
+
async def get_template_by_name(template_name: str) -> OCRTemplate:
|
| 37 |
+
db = get_database(settings.MongoDB_NAME)
|
| 38 |
+
template = await db["templates"].find_one({"template_name": template_name})
|
| 39 |
+
if template:
|
| 40 |
+
return OCRTemplate(**template)
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
async def get_template_by_name_and_user(template_name: str, user_id: Optional[str]) -> OCRTemplate:
|
| 44 |
+
query = {"template_name": template_name, "user_id": user_id}
|
| 45 |
+
print("Query:", query)
|
| 46 |
+
db = get_database(settings.MongoDB_NAME)
|
| 47 |
+
template = await db["templates"].find_one(query)
|
| 48 |
+
print(template)
|
| 49 |
+
if template:
|
| 50 |
+
return OCRTemplate(**template)
|
| 51 |
+
return None
|
| 52 |
+
|
| 53 |
+
async def update_template(template_name: str, template: OCRTemplate, user_id: str):
|
| 54 |
+
query = {"template_name": template_name, "user_id": user_id}
|
| 55 |
+
print("Query:", query)
|
| 56 |
+
db = get_database(settings.MongoDB_NAME)
|
| 57 |
+
updated_template = await db["templates"].find_one_and_update(
|
| 58 |
+
query,
|
| 59 |
+
{"$set": template.dict()},
|
| 60 |
+
return_document=ReturnDocument.AFTER
|
| 61 |
+
)
|
| 62 |
+
if updated_template:
|
| 63 |
+
return OCRTemplate(**updated_template)
|
| 64 |
+
return None
|
| 65 |
+
|
| 66 |
+
async def delete_template(template_name: str, user_id: str):
|
| 67 |
+
query = {"template_name": template_name, "user_id": user_id}
|
| 68 |
+
print("Query:", query)
|
| 69 |
+
db = get_database(settings.MongoDB_NAME)
|
| 70 |
+
deleted_template = await db["templates"].find_one_and_delete(query)
|
| 71 |
+
if deleted_template:
|
| 72 |
+
return OCRTemplate(**deleted_template)
|
| 73 |
+
return None
|
app/crud/users.py
CHANGED
|
@@ -4,20 +4,23 @@ from app.models.users import UserInDB, User
|
|
| 4 |
from app.core.security import get_password_hash, verify_password
|
| 5 |
from bson import ObjectId
|
| 6 |
from app.core.config import settings
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
async def create_user(user: UserInDB):
|
| 10 |
user_dict = user.dict()
|
|
|
|
|
|
|
| 11 |
user_dict["password"] = get_password_hash(user.password)
|
| 12 |
db = get_database(settings.MongoDB_NAME)
|
| 13 |
result = await db["users"].insert_one(user_dict)
|
| 14 |
-
|
|
|
|
| 15 |
return User(**user_dict)
|
| 16 |
|
| 17 |
async def get_user_by_username(username: str):
|
| 18 |
db = get_database(settings.MongoDB_NAME)
|
| 19 |
user = await db["users"].find_one({"username": username})
|
| 20 |
-
|
| 21 |
return user
|
| 22 |
|
| 23 |
async def get_user_by_email(email: str):
|
|
|
|
| 4 |
from app.core.security import get_password_hash, verify_password
|
| 5 |
from bson import ObjectId
|
| 6 |
from app.core.config import settings
|
| 7 |
+
import uuid
|
| 8 |
|
| 9 |
|
| 10 |
async def create_user(user: UserInDB):
|
| 11 |
user_dict = user.dict()
|
| 12 |
+
user_dict["user_id"] = str(uuid.uuid4())
|
| 13 |
+
print("user_id:", user_dict["user_id"])
|
| 14 |
user_dict["password"] = get_password_hash(user.password)
|
| 15 |
db = get_database(settings.MongoDB_NAME)
|
| 16 |
result = await db["users"].insert_one(user_dict)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
return User(**user_dict)
|
| 20 |
|
| 21 |
async def get_user_by_username(username: str):
|
| 22 |
db = get_database(settings.MongoDB_NAME)
|
| 23 |
user = await db["users"].find_one({"username": username})
|
|
|
|
| 24 |
return user
|
| 25 |
|
| 26 |
async def get_user_by_email(email: str):
|
app/db/__pycache__/base.cpython-310.pyc
CHANGED
|
Binary files a/app/db/__pycache__/base.cpython-310.pyc and b/app/db/__pycache__/base.cpython-310.pyc differ
|
|
|
app/db/models/__pycache__/config.cpython-310.pyc
ADDED
|
Binary file (805 Bytes). View file
|
|
|
app/db/models/config.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException, Depends
|
| 2 |
+
from typing import Optional, List
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
class ConfigUpdateRequest(BaseModel):
|
| 7 |
+
MONGO_DETAILS: Optional[str] = None
|
| 8 |
+
MongoDB_NAME: Optional[str] = None
|
| 9 |
+
COLLECTION_NAMES: Optional[List[str]] = None
|
| 10 |
+
SECRET_KEY: Optional[str] = None
|
| 11 |
+
ALGORITHM: Optional[str] = None
|
| 12 |
+
ACCESS_TOKEN_EXPIRE_MINUTES: Optional[int] = None
|
app/main.py
CHANGED
|
@@ -13,13 +13,14 @@ from bson import ObjectId
|
|
| 13 |
from contextlib import asynccontextmanager
|
| 14 |
from pydantic import BaseModel, Field
|
| 15 |
from datetime import timedelta, datetime
|
| 16 |
-
from dotenv import
|
| 17 |
-
from app.api.v1.endpoints import user, auth
|
| 18 |
from app.db.base import *
|
| 19 |
from app.core.auth import *
|
| 20 |
# from app.router.user import *
|
| 21 |
from app.core.database import *
|
| 22 |
|
|
|
|
| 23 |
# Load environment variables from .env file
|
| 24 |
dotenv_values(".env")
|
| 25 |
|
|
@@ -50,16 +51,23 @@ async def lifespan(app: FastAPI):
|
|
| 50 |
logger.error(e)
|
| 51 |
|
| 52 |
app = FastAPI(lifespan=lifespan)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
app.add_middleware(
|
| 54 |
CORSMiddleware,
|
| 55 |
-
allow_origins=
|
| 56 |
allow_credentials=True,
|
| 57 |
allow_methods=["*"],
|
| 58 |
allow_headers=["*"],
|
| 59 |
)
|
| 60 |
|
| 61 |
app.include_router(user.router, prefix='/api/v1/user', tags=["User"])
|
|
|
|
|
|
|
| 62 |
app.include_router(auth.router, tags=["Auth"])
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
class Destination(BaseModel):
|
|
|
|
| 13 |
from contextlib import asynccontextmanager
|
| 14 |
from pydantic import BaseModel, Field
|
| 15 |
from datetime import timedelta, datetime
|
| 16 |
+
from dotenv import dotenv_values
|
| 17 |
+
from app.api.v1.endpoints import user, auth, ocr, ocrtemplate, config
|
| 18 |
from app.db.base import *
|
| 19 |
from app.core.auth import *
|
| 20 |
# from app.router.user import *
|
| 21 |
from app.core.database import *
|
| 22 |
|
| 23 |
+
|
| 24 |
# Load environment variables from .env file
|
| 25 |
dotenv_values(".env")
|
| 26 |
|
|
|
|
| 51 |
logger.error(e)
|
| 52 |
|
| 53 |
app = FastAPI(lifespan=lifespan)
|
| 54 |
+
# Allow CORS for specific origin with credentials
|
| 55 |
+
origins = [
|
| 56 |
+
"http://localhost:5000",
|
| 57 |
+
]
|
| 58 |
app.add_middleware(
|
| 59 |
CORSMiddleware,
|
| 60 |
+
allow_origins=origins,
|
| 61 |
allow_credentials=True,
|
| 62 |
allow_methods=["*"],
|
| 63 |
allow_headers=["*"],
|
| 64 |
)
|
| 65 |
|
| 66 |
app.include_router(user.router, prefix='/api/v1/user', tags=["User"])
|
| 67 |
+
app.include_router(ocrtemplate.router, prefix='/api/v1/ocrtemplate', tags=["OCR Template"])
|
| 68 |
+
app.include_router(ocr.router, prefix='/api/v1/ocr', tags=["OCR"])
|
| 69 |
app.include_router(auth.router, tags=["Auth"])
|
| 70 |
+
app.include_router(config.router, prefix='/api/v1/config', tags=["Config"])
|
| 71 |
|
| 72 |
|
| 73 |
class Destination(BaseModel):
|
app/models/__pycache__/bpjs.cpython-310.pyc
ADDED
|
Binary file (539 Bytes). View file
|
|
|
app/models/__pycache__/ocrtemplate.cpython-310.pyc
ADDED
|
Binary file (864 Bytes). View file
|
|
|
app/models/__pycache__/token.cpython-310.pyc
CHANGED
|
Binary files a/app/models/__pycache__/token.cpython-310.pyc and b/app/models/__pycache__/token.cpython-310.pyc differ
|
|
|
app/models/__pycache__/users.cpython-310.pyc
CHANGED
|
Binary files a/app/models/__pycache__/users.cpython-310.pyc and b/app/models/__pycache__/users.cpython-310.pyc differ
|
|
|
app/models/ocrtemplate.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import Dict, Optional
|
| 3 |
+
|
| 4 |
+
class OCRTemplate(BaseModel):
|
| 5 |
+
template_name: str
|
| 6 |
+
fields: Dict[str, str]
|
| 7 |
+
user_id: Optional[str] = Field(None, description="ID of the user who owns the template. None for common templates.")
|
| 8 |
+
|
| 9 |
+
class OCRTemplateInDB(BaseModel):
|
| 10 |
+
template_name: str
|
| 11 |
+
fields: Dict[str, str]
|
| 12 |
+
user_id: str
|
app/models/users.py
CHANGED
|
@@ -2,8 +2,10 @@
|
|
| 2 |
from pydantic import BaseModel, EmailStr, Field
|
| 3 |
from typing import Optional
|
| 4 |
from datetime import timedelta, datetime
|
|
|
|
| 5 |
|
| 6 |
class User(BaseModel):
|
|
|
|
| 7 |
username: str
|
| 8 |
email: EmailStr
|
| 9 |
password: str
|
|
|
|
| 2 |
from pydantic import BaseModel, EmailStr, Field
|
| 3 |
from typing import Optional
|
| 4 |
from datetime import timedelta, datetime
|
| 5 |
+
import uuid
|
| 6 |
|
| 7 |
class User(BaseModel):
|
| 8 |
+
user_id: str = Field(default_factory=uuid.uuid4())
|
| 9 |
username: str
|
| 10 |
email: EmailStr
|
| 11 |
password: str
|