sameernotes commited on
Commit
4078a51
·
verified ·
1 Parent(s): 30b75f2

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -166
app.py DELETED
@@ -1,166 +0,0 @@
1
- import os
2
- import io
3
- import sys
4
- import cv2
5
- import base64
6
- import pickle
7
- import numpy as np
8
- import tensorflow as tf
9
- import matplotlib.pyplot as plt
10
- import matplotlib.font_manager as fm
11
- import tempfile
12
- import sakshi_ocr
13
-
14
- from fastapi import FastAPI, File, UploadFile, HTTPException
15
- from fastapi.responses import HTMLResponse, JSONResponse
16
-
17
- # Define paths to your assets (update these if necessary)
18
- MODEL_PATH = 'hindi_ocr_model.keras'
19
- ENCODER_PATH = 'label_encoder.pkl'
20
- FONT_PATH = 'NotoSansDevanagari-Regular.ttf'
21
-
22
- # Load custom font if available
23
- if os.path.exists(FONT_PATH):
24
- fm.fontManager.addfont(FONT_PATH)
25
- plt.rcParams['font.family'] = 'Noto Sans Devanagari'
26
- else:
27
- print("Custom font not found. Using default font.")
28
-
29
- # Load the OCR model
30
- def load_model():
31
- if not os.path.exists(MODEL_PATH):
32
- raise FileNotFoundError(f"Model file not found at {MODEL_PATH}")
33
- return tf.keras.models.load_model(MODEL_PATH)
34
-
35
- # Load the label encoder
36
- def load_label_encoder():
37
- if not os.path.exists(ENCODER_PATH):
38
- raise FileNotFoundError(f"Label encoder file not found at {ENCODER_PATH}")
39
- with open(ENCODER_PATH, 'rb') as f:
40
- return pickle.load(f)
41
-
42
- # Global loading so they persist across requests
43
- model = load_model()
44
- label_encoder = load_label_encoder()
45
-
46
- # Function for word detection
47
- def detect_words(image):
48
- # Assume input is a grayscale image
49
- _, binary = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
50
- kernel = np.ones((3, 3), np.uint8)
51
- dilated = cv2.dilate(binary, kernel, iterations=2)
52
- contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
53
-
54
- word_img = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
55
- word_count = 0
56
- for contour in contours:
57
- x, y, w, h = cv2.boundingRect(contour)
58
- if w > 10 and h > 10:
59
- cv2.rectangle(word_img, (x, y), (x+w, y+h), (0, 255, 0), 2)
60
- word_count += 1
61
- return word_img, word_count
62
-
63
- # Function to run Sakshi OCR and capture its output
64
- def run_sakshi_ocr(image_path):
65
- buffer = io.StringIO()
66
- old_stdout = sys.stdout
67
- sys.stdout = buffer
68
- try:
69
- sakshi_ocr.generate(image_path)
70
- finally:
71
- sys.stdout = old_stdout
72
- return buffer.getvalue()
73
-
74
- # Utility function: convert image (numpy array) to a base64 encoded string
75
- def image_to_base64(image, ext=".png"):
76
- success, encoded_image = cv2.imencode(ext, image)
77
- if not success:
78
- return None
79
- return base64.b64encode(encoded_image).decode('utf-8')
80
-
81
- # Initialize FastAPI app
82
- app = FastAPI(title="Hindi OCR App by sakshi")
83
-
84
- @app.get("/", response_class=HTMLResponse)
85
- async def root():
86
- html_content = """
87
- <html>
88
- <head>
89
- <title>Hindi OCR App by sakshi</title>
90
- </head>
91
- <body>
92
- <h1>Hindi OCR App by sakshi</h1>
93
- <form action="/predict" enctype="multipart/form-data" method="post">
94
- <input name="file" type="file" accept="image/*">
95
- <input type="submit" value="Upload and Predict">
96
- </form>
97
- </body>
98
- </html>
99
- """
100
- return HTMLResponse(content=html_content)
101
-
102
- @app.post("/predict")
103
- async def predict(file: UploadFile = File(...)):
104
- # Read and decode the uploaded image
105
- contents = await file.read()
106
- nparr = np.frombuffer(contents, np.uint8)
107
- img = cv2.imdecode(nparr, cv2.IMREAD_GRAYSCALE)
108
- if img is None:
109
- raise HTTPException(status_code=400, detail="Error reading the image.")
110
-
111
- # Encode the original image to base64 for visualization
112
- original_image = image_to_base64(cv2.cvtColor(img, cv2.COLOR_GRAY2BGR))
113
-
114
- # Word detection
115
- word_img, word_count = detect_words(img)
116
- word_img_encoded = image_to_base64(word_img)
117
-
118
- # OCR model prediction for single word
119
- try:
120
- img_resized = cv2.resize(img, (128, 32))
121
- img_norm = img_resized / 255.0
122
- img_input = img_norm[np.newaxis, ..., np.newaxis] # shape: (1, 32, 128, 1)
123
- pred = model.predict(img_input)
124
- pred_label_idx = np.argmax(pred)
125
- pred_label = label_encoder.inverse_transform([pred_label_idx])[0]
126
-
127
- # Generate an image with the prediction using matplotlib
128
- fig, ax = plt.subplots()
129
- ax.imshow(img, cmap='gray')
130
- ax.set_title(f"Predicted: {pred_label}", fontsize=12)
131
- ax.axis('off')
132
- buf = io.BytesIO()
133
- plt.savefig(buf, format="png")
134
- buf.seek(0)
135
- pred_img_array = np.frombuffer(buf.getvalue(), np.uint8)
136
- prediction_img = cv2.imdecode(pred_img_array, cv2.IMREAD_COLOR)
137
- prediction_img_encoded = image_to_base64(prediction_img)
138
- plt.close(fig)
139
- except Exception as e:
140
- raise HTTPException(status_code=500, detail=f"Error in OCR model processing: {e}")
141
-
142
- # Run Sakshi OCR on the image by saving temporarily
143
- try:
144
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
145
- cv2.imwrite(tmp_file.name, img)
146
- tmp_file_path = tmp_file.name
147
- sakshi_output = run_sakshi_ocr(tmp_file_path)
148
- os.remove(tmp_file_path)
149
- except Exception as e:
150
- sakshi_output = f"Error running Sakshi OCR: {e}"
151
-
152
- # Prepare the response
153
- response_data = {
154
- "word_count": word_count,
155
- "ocr_prediction": pred_label,
156
- "sakshi_ocr_output": sakshi_output,
157
- "original_image": original_image,
158
- "word_detected_image": word_img_encoded,
159
- "prediction_image": prediction_img_encoded
160
- }
161
-
162
- return JSONResponse(content=response_data)
163
-
164
- if __name__ == "__main__":
165
- import uvicorn
166
- uvicorn.run(app, host="0.0.0.0", port=8000)