Raghav001 commited on
Commit
7b83bab
·
1 Parent(s): c805624
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -5,7 +5,12 @@ import gradio as gr
5
  import pdfplumber
6
  import pandas as pd
7
  import time
8
- from cnocr import CnOcr
 
 
 
 
 
9
  from sentence_transformers import SentenceTransformer, models, util
10
  word_embedding_model = models.Transformer('sentence-transformers/all-MiniLM-L6-v2', do_lower_case=True)
11
  pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode='cls')
@@ -118,7 +123,8 @@ def up_file(files):
118
  with open(file_name, mode='wb') as f:
119
  f.write(img['stream'].get_data())
120
  try:
121
- res = ocr.ocr(file_name)
 
122
  except Exception as e:
123
  res = []
124
  if len(res) > 0:
 
5
  import pdfplumber
6
  import pandas as pd
7
  import time
8
+ # from cnocr import CnOcr
9
+
10
+ from langchain.document_loaders import PyPDFLoader
11
+
12
+ pages = loader.load_and_split()
13
+
14
  from sentence_transformers import SentenceTransformer, models, util
15
  word_embedding_model = models.Transformer('sentence-transformers/all-MiniLM-L6-v2', do_lower_case=True)
16
  pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode='cls')
 
123
  with open(file_name, mode='wb') as f:
124
  f.write(img['stream'].get_data())
125
  try:
126
+ # res = ocr.ocr(file_name)
127
+ res = PyPDFLoader(file_name)
128
  except Exception as e:
129
  res = []
130
  if len(res) > 0: