bk939448 commited on
Commit
b1c5894
·
verified ·
1 Parent(s): 6298ba6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -7
app.py CHANGED
@@ -4,9 +4,17 @@ from pdf2image import convert_from_bytes
4
  from PIL import Image
5
  import pytesseract
6
  import io
 
7
 
8
  app = FastAPI()
9
 
 
 
 
 
 
 
 
10
  @app.post("/ocr")
11
  async def extract_text(file: UploadFile = File(...)):
12
  filename = file.filename.lower()
@@ -24,18 +32,21 @@ async def extract_text(file: UploadFile = File(...)):
24
  try:
25
  if filename.endswith(".pdf"):
26
  images = convert_from_bytes(contents)
27
- for page in images:
28
- text = pytesseract.image_to_string(page, lang="hin+eng")
29
- extracted_text += text + "\n\n"
 
 
 
 
30
  else:
31
  image = Image.open(io.BytesIO(contents))
32
- text = pytesseract.image_to_string(image, lang="hin+eng")
33
- extracted_text = text
34
 
35
  return {"text": extracted_text.strip() or "⚠️ No text found."}
36
-
37
  except Exception as e:
38
  return JSONResponse(
39
  content={"error": "🚫 Failed to process file", "details": str(e)},
40
  status_code=500
41
- )
 
4
  from PIL import Image
5
  import pytesseract
6
  import io
7
+ from concurrent.futures import ThreadPoolExecutor
8
 
9
  app = FastAPI()
10
 
11
+ # एक page को OCR में डालने वाली function
12
+ def ocr_page(image):
13
+ try:
14
+ return pytesseract.image_to_string(image, lang="hin+eng")
15
+ except Exception as e:
16
+ return f"⚠️ Error: {str(e)}"
17
+
18
  @app.post("/ocr")
19
  async def extract_text(file: UploadFile = File(...)):
20
  filename = file.filename.lower()
 
32
  try:
33
  if filename.endswith(".pdf"):
34
  images = convert_from_bytes(contents)
35
+
36
+ # सभी pages को parallel OCR में भेजते हैं
37
+ with ThreadPoolExecutor() as executor:
38
+ results = executor.map(ocr_page, images)
39
+
40
+ extracted_text = "\n\n".join(results)
41
+
42
  else:
43
  image = Image.open(io.BytesIO(contents))
44
+ extracted_text = pytesseract.image_to_string(image, lang="hin+eng")
 
45
 
46
  return {"text": extracted_text.strip() or "⚠️ No text found."}
47
+
48
  except Exception as e:
49
  return JSONResponse(
50
  content={"error": "🚫 Failed to process file", "details": str(e)},
51
  status_code=500
52
+ )