File size: 1,869 Bytes
4b0118c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# import ocrmypdf
# from src.Bot.logger import logging


# class OCR:
#     def __init__(self,input, output):
#         self.input = input
#         self.output = output

#     def do_ocr(self):
#         ocrmypdf.ocr(self.input, output_file=self.output)
#         return self.output



import ocrmypdf
from src.Bot.logger import logging
import os

class OCR:
    def __init__(self, input, output=None):
        self.input = input
        # Set default output path if none is provided
        if output is None:
            default_output_dir = os.path.join(os.getcwd(), "output")  # Default directory for output files
            os.makedirs(default_output_dir, exist_ok=True)  # Create the directory if it doesn't exist
            self.output = os.path.join(default_output_dir, "output.pdf")  # Default output file path
        else:
            self.output = output

    def do_ocr(self):
        ocrmypdf.ocr(self.input, output_file=self.output,force_ocr=True,)
        return self.output


    # Function to reset the FAISS index (clear vectors)
    def reset_faiss_index(vector_store):
        """Clear all vectors from the FAISS index."""
        if isinstance(vector_store.index, faiss.Index):
            vector_store.index.reset()
            print("FAISS index has been reset (vectors cleared).")
        else:
            print("No FAISS index found.")

    # Function to delete the FAISS index (remove from memory)
    def delete_faiss_index(vector_store):
        """Delete the FAISS index and free up memory."""
        if isinstance(vector_store.index, faiss.Index):
            del vector_store.index
            vector_store.index = None  # Set to None to avoid further access
            gc.collect()  # Ensure memory is freed
            print("FAISS index deleted and memory cleared.")
        else:
            print("No FAISS index found.")