sachinsen1295's picture
Upload 13 files
4b0118c verified
# import ocrmypdf
# from src.Bot.logger import logging
# class OCR:
# def __init__(self,input, output):
# self.input = input
# self.output = output
# def do_ocr(self):
# ocrmypdf.ocr(self.input, output_file=self.output)
# return self.output
import ocrmypdf
from src.Bot.logger import logging
import os
class OCR:
def __init__(self, input, output=None):
self.input = input
# Set default output path if none is provided
if output is None:
default_output_dir = os.path.join(os.getcwd(), "output") # Default directory for output files
os.makedirs(default_output_dir, exist_ok=True) # Create the directory if it doesn't exist
self.output = os.path.join(default_output_dir, "output.pdf") # Default output file path
else:
self.output = output
def do_ocr(self):
ocrmypdf.ocr(self.input, output_file=self.output,force_ocr=True,)
return self.output
# Function to reset the FAISS index (clear vectors)
def reset_faiss_index(vector_store):
"""Clear all vectors from the FAISS index."""
if isinstance(vector_store.index, faiss.Index):
vector_store.index.reset()
print("FAISS index has been reset (vectors cleared).")
else:
print("No FAISS index found.")
# Function to delete the FAISS index (remove from memory)
def delete_faiss_index(vector_store):
"""Delete the FAISS index and free up memory."""
if isinstance(vector_store.index, faiss.Index):
del vector_store.index
vector_store.index = None # Set to None to avoid further access
gc.collect() # Ensure memory is freed
print("FAISS index deleted and memory cleared.")
else:
print("No FAISS index found.")