| | from fastapi import FastAPI, HTTPException |
| | from pydantic import BaseModel |
| | import tempfile |
| | import os |
| | from fastapi.responses import JSONResponse |
| | import pytesseract |
| | from pytesseract import Output |
| | from PIL import Image |
| | import requests |
| | from fastapi.routing import APIRouter |
| | from io import BytesIO |
| |
|
| | tessaract_ocr_router = APIRouter(tags=["OCR"]) |
| |
|
| |
|
| | class HighlightRequest(BaseModel): |
| | imageUrl: str |
| | searchTerms: list[str] |
| |
|
| |
|
| | @tessaract_ocr_router.post("/highlight") |
| | async def highlight(request: HighlightRequest): |
| | image_url = request.imageUrl |
| | search_terms = request.searchTerms |
| |
|
| | if not image_url or not isinstance(search_terms, list) or len(search_terms) == 0: |
| | raise HTTPException( |
| | status_code=400, detail="imageUrl and searchTerms are required" |
| | ) |
| |
|
| | try: |
| | |
| | response = requests.get(image_url) |
| | if response.status_code != 200: |
| | raise HTTPException(status_code=400, detail="Failed to download image") |
| |
|
| | image = Image.open(BytesIO(response.content)) |
| |
|
| | |
| | ocr_data = pytesseract.image_to_data(image, lang="eng", output_type=Output.DICT) |
| | words = [ |
| | { |
| | "text": ocr_data["text"][i], |
| | "bbox": { |
| | "x0": ocr_data["left"][i], |
| | "y0": ocr_data["top"][i], |
| | "x1": ocr_data["left"][i] + ocr_data["width"][i], |
| | "y1": ocr_data["top"][i] + ocr_data["height"][i], |
| | }, |
| | } |
| | for i in range(len(ocr_data["text"])) |
| | if ocr_data["text"][i].strip() != "" |
| | ] |
| |
|
| | highlights = [] |
| |
|
| | |
| | for term in search_terms: |
| | term_words = term.lower().split(" ") |
| | term_len = len(term_words) |
| |
|
| | word_index = 0 |
| |
|
| | for i, word_obj in enumerate(words): |
| | word = word_obj["text"].lower() |
| |
|
| | if word == term_words[word_index]: |
| | word_index += 1 |
| |
|
| | |
| | if word_index == term_len: |
| | word_index = 0 |
| |
|
| | |
| | x_start = words[i - term_len + 1]["bbox"]["x0"] |
| | y_start = words[i - term_len + 1]["bbox"]["y0"] |
| | x_end = words[i]["bbox"]["x1"] |
| | y_end = words[i]["bbox"]["y1"] |
| |
|
| | highlights.append( |
| | { |
| | "text": term, |
| | "bbox": { |
| | "x0": x_start, |
| | "y0": y_start, |
| | "x1": x_end, |
| | "y1": y_end, |
| | }, |
| | } |
| | ) |
| | else: |
| | word_index = 0 |
| |
|
| | |
| | return JSONResponse( |
| | content={"searchTerms": search_terms, "highlights": highlights} |
| | ) |
| |
|
| | except Exception as e: |
| | return HTTPException( |
| | status_code=500, |
| | detail=f"An error occurred while processing the image: {str(e)}", |
| | ) |
| |
|