Spaces:
Build error
Build error
| import os | |
| import shutil | |
| import cv2 | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| from paddleocr import PaddleOCR | |
| from fastapi import FastAPI, UploadFile, File | |
| from fastapi.responses import FileResponse, JSONResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| os.environ["PADDLEOCR_HOME"] = "/app/paddleocr_models" | |
| os.environ["MPLCONFIGDIR"] = "/app/tmp/mpl_config" | |
| app = FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=['*'], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| ocr = PaddleOCR( | |
| det_model_dir='/app/paddleocr_models/det', | |
| rec_model_dir='/app/paddleocr_models/rec', | |
| cls_model_dir='/app/paddleocr_models/cls', | |
| use_angle_cls=True, | |
| lang='en', | |
| ) | |
| # Initialize PaddleOCR | |
| # ocr = PaddleOCR(use_angle_cls=True, lang="en", det_db_box_thresh=0.5) | |
| def read_root(): | |
| return JSONResponse({ | |
| "message": "Hello World" | |
| }) | |
| # Load Image | |
| def load_image(file: UploadFile = File(...)): | |
| image_path = f"uploads/{file.filename}" | |
| os.makedirs("uploads", exist_ok=True) | |
| with open(image_path, "wb") as f: | |
| shutil.copyfileobj(file.file, f) | |
| image = cv2.imread(image_path) | |
| image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| height, width, channels = image.shape | |
| # OCR Processing | |
| ocr_results = ocr.ocr(image_path) | |
| print(ocr_results) | |
| extracted_text = [] | |
| page = ocr_results[0] | |
| for block in page: | |
| print(block) | |
| # Lists of recognized texts and their bounding boxes | |
| texts = [line[1][0] for line in page] | |
| boxes = [np.array(line[0]) for line in page] | |
| scores = [line[1][1] for line in page] | |
| print(texts) | |
| # Zip them together | |
| text_and_boxes = list(zip(texts, boxes, scores)) | |
| final_text = list(zip(texts, scores)) | |
| # Display all results | |
| for text, box, score in text_and_boxes: | |
| print(f"Text: {text}") | |
| print(f"Bounding Box: {box.tolist()}") # Convert numpy array to regular list | |
| print(f"Score: {score}") | |
| print("---") | |
| extracted_text.append((text, score)) | |
| # Print Extracted Text | |
| print("🔹 Extracted Text from Invoice:") | |
| collected_text = "" | |
| for text, score in extracted_text: | |
| collected_text = f"{collected_text} {text}" | |
| print(f"{text} (Confidence: {score:.2f})") | |
| collected_text = collected_text.strip() | |
| # Create a simple dataframe from all OCR text | |
| all_text = [text for text, _ in extracted_text] | |
| print("\n🔹 Creating a simple data structure from all OCR text") | |
| df = pd.DataFrame({'text': all_text}) | |
| print(df.head()) | |
| # df.to_csv("invoice_extracted_text.csv", index=False) | |
| # Display Image with OCR Text Overlay | |
| plt.figure(figsize=(10, 10)) | |
| plt.imshow(image) | |
| # Draw bounding boxes and text annotations | |
| for text, box, score in text_and_boxes: | |
| # y_offset = int(0.03 * height) # 5% downward shift | |
| y_offset = 0 | |
| print(height) | |
| corrected_box = [(x, y + y_offset) for (x, y) in box] | |
| # Draw bounding box | |
| plt.plot( | |
| [corrected_box[0][0], corrected_box[1][0], corrected_box[2][0], corrected_box[3][0], corrected_box[0][0]], | |
| [corrected_box[0][1], corrected_box[1][1], corrected_box[2][1], corrected_box[3][1], corrected_box[0][1]], 'r-' | |
| ) | |
| # Add text annotation | |
| csfont = {'fontname': 'Poppins'} | |
| plt.text(corrected_box[0][0], corrected_box[0][1], text, color='blue', fontsize=8, **csfont) | |
| plt.axis("off") | |
| plt.tight_layout(pad=2.0) | |
| plt.savefig(f"uploads/result.png", bbox_inches='tight') | |
| plt.close() | |
| if os.path.exists(image_path): | |
| os.remove(image_path) | |
| return JSONResponse({ | |
| "message": "Image processed successfully", | |
| "image_path": "result.png", | |
| "extracted_text": final_text, | |
| "text": collected_text, | |
| }) | |
| def get_image(input_path: str): | |
| if not os.path.exists(f"uploads/{input_path}"): | |
| return JSONResponse({ | |
| "message": "Image not found" | |
| }) | |
| return FileResponse(f"uploads/{input_path}") | |
| print("\n🔹 Processing complete! Annotated image and extracted data saved.") | |