Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,14 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|
| 9 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 10 |
from io import StringIO
|
| 11 |
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
# Function to extract text from a PDF file
|
| 14 |
def extract_text_from_pdf(pdf_file):
|
|
|
|
| 9 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 10 |
from io import StringIO
|
| 11 |
from PIL import Image
|
| 12 |
+
import base64
|
| 13 |
+
from io import BytesIO
|
| 14 |
+
|
| 15 |
+
# Function to convert image to base64
|
| 16 |
+
def image_to_base64(image):
|
| 17 |
+
buffered = BytesIO()
|
| 18 |
+
image.save(buffered, format="PNG")
|
| 19 |
+
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
| 20 |
|
| 21 |
# Function to extract text from a PDF file
|
| 22 |
def extract_text_from_pdf(pdf_file):
|