college / extract_text.py
battulabhaskar543
updated code files for deployment
c92680a
raw
history blame contribute delete
574 Bytes
#!/usr/bin/env python3
import os
import sys
sys.path.append('/home/bhaskar/cd/campusguide')
from ingestion.document_loader import DocumentLoader
def main():
loader = DocumentLoader()
pdf_path = '/home/bhaskar/cd/campusguide/data/raw/ICFAI_Internship_Guidelines-1.pdf'
if not os.path.exists(pdf_path):
print("PDF not found")
return
doc = loader.load_document(pdf_path)
full_text = doc['text']
print("Full extracted text:")
print("=" * 50)
print(full_text)
print("=" * 50)
if __name__ == "__main__":
main()