Spaces:
Sleeping
Sleeping
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| import pytesseract | |
| from PIL import Image | |
| import fitz # PyMuPDF | |
| import os | |
| app = Flask(__name__) | |
| CORS(app) | |
| def home(): | |
| return "π Hindi OCR API is running!" | |
| def ocr(): | |
| if "file" not in request.files: | |
| return jsonify({"error": "β No file uploaded"}), 400 | |
| file = request.files["file"] | |
| filename = file.filename | |
| if filename.endswith(".pdf"): | |
| doc = fitz.open(stream=file.read(), filetype="pdf") | |
| text = "" | |
| for page in doc: | |
| pix = page.get_pixmap() | |
| img_path = "temp.png" | |
| pix.save(img_path) | |
| img = Image.open(img_path) | |
| text += pytesseract.image_to_string(img, lang="hin+eng") + "\n" | |
| os.remove("temp.png") | |
| return jsonify({"text": text.strip()}) | |
| elif filename.endswith((".png", ".jpg", ".jpeg")): | |
| img = Image.open(file.stream) | |
| text = pytesseract.image_to_string(img, lang="hin+eng") | |
| return jsonify({"text": text.strip()}) | |
| else: | |
| return jsonify({"error": "β Unsupported file type"}), 400 | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860) | |