Spaces:

bk939448
/

hindi-ocr-api

Sleeping

hindi-ocr-api / app.py

badman99dev

🛠️ Updated OCR files with PDF support

279020a 7 months ago

1.25 kB

	from flask import Flask, request, jsonify
	from flask_cors import CORS
	import pytesseract
	from PIL import Image
	import fitz # PyMuPDF
	import os

	app = Flask(__name__)
	CORS(app)

	@app.route("/")
	def home():
	return "🚀 Hindi OCR API is running!"

	@app.route("/api/ocr", methods=["POST"])
	def ocr():
	if "file" not in request.files:
	return jsonify({"error": "❌ No file uploaded"}), 400

	file = request.files["file"]
	filename = file.filename

	if filename.endswith(".pdf"):
	doc = fitz.open(stream=file.read(), filetype="pdf")
	text = ""
	for page in doc:
	pix = page.get_pixmap()
	img_path = "temp.png"
	pix.save(img_path)
	img = Image.open(img_path)
	text += pytesseract.image_to_string(img, lang="hin+eng") + "\n"
	os.remove("temp.png")
	return jsonify({"text": text.strip()})

	elif filename.endswith((".png", ".jpg", ".jpeg")):
	img = Image.open(file.stream)
	text = pytesseract.image_to_string(img, lang="hin+eng")
	return jsonify({"text": text.strip()})

	else:
	return jsonify({"error": "❌ Unsupported file type"}), 400

	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)