Spaces:
Sleeping
Sleeping
Commit
·
31ef0bb
1
Parent(s):
041f935
initial commit
Browse files- EngaigeQuery.py +61 -0
- Engaigemodelling.py +162 -0
- Procfile +1 -0
- app.py +246 -0
- metadata1.json +0 -0
- requirements.txt +9 -0
- static/css/styles.css +209 -0
- static/js/scripts.js +0 -0
- templates/index.html +40 -0
- uploads/employee_handbook_print_1.pdf +0 -0
- vercel.json +9 -0
EngaigeQuery.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer
|
| 2 |
+
import faiss
|
| 3 |
+
import json
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
# Initialize the sentence transformer model
|
| 7 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 8 |
+
|
| 9 |
+
index_path = 'vector_indexNLP.faiss'
|
| 10 |
+
metadata_path = 'metadataNLP.json'
|
| 11 |
+
|
| 12 |
+
# Load FAISS index and metadata
|
| 13 |
+
index = faiss.read_index(index_path)
|
| 14 |
+
with open(metadata_path, 'r') as f:
|
| 15 |
+
metadata = json.load(f)
|
| 16 |
+
|
| 17 |
+
def convert_distance_to_similarity(distance):
|
| 18 |
+
# Assuming the distances are non-negative, we can use a simple conversion:
|
| 19 |
+
return 1 / (1 + distance)*100
|
| 20 |
+
|
| 21 |
+
def query_index(query, model, index, metadata, top_k=5):
|
| 22 |
+
query_embedding = model.encode(query).reshape(1,-1).astype('float32')
|
| 23 |
+
D, I = index.search(query_embedding, top_k)
|
| 24 |
+
|
| 25 |
+
results = []
|
| 26 |
+
for i in range(top_k):
|
| 27 |
+
doc_metadata = metadata[I[0, i]]
|
| 28 |
+
similarity_score = convert_distance_to_similarity(D[0, i])
|
| 29 |
+
result = {
|
| 30 |
+
"filename": doc_metadata["filename"],
|
| 31 |
+
"page_num": doc_metadata["page_num"],
|
| 32 |
+
"standardized_text": doc_metadata["standardized_text"],
|
| 33 |
+
"question_text":doc_metadata["question_text"],
|
| 34 |
+
"answerable_text":doc_metadata["answerable_text"],
|
| 35 |
+
"score":similarity_score
|
| 36 |
+
}
|
| 37 |
+
results.append(result)
|
| 38 |
+
|
| 39 |
+
return results
|
| 40 |
+
|
| 41 |
+
query = "what is Rule-Based Machine Translation?"
|
| 42 |
+
results = query_index(query, model, index, metadata)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def create_answer_to_show(query, results):
|
| 47 |
+
answer = f"Based on your query '{query}', the following relevant information was found:\n\n"
|
| 48 |
+
for result in results:
|
| 49 |
+
answer += "\n------------------------------------------------------------------------------------------------------------------\n"
|
| 50 |
+
answer += f"Filename: {result['filename']}\n"
|
| 51 |
+
answer += f"Page number: {result['page_num']}\n"
|
| 52 |
+
answer += f"Related keywords: {result['question_text'][:100]}...\n"
|
| 53 |
+
if result['answerable_text']!="":
|
| 54 |
+
answer += f"Answer: {result['answerable_text'][:500]}\n"
|
| 55 |
+
answer += f"Relevancy Score: {result['score']}\n"
|
| 56 |
+
answer += "\nFor more detailed information, please refer to the respective original texts.\n\n\n"
|
| 57 |
+
return answer
|
| 58 |
+
|
| 59 |
+
answer = create_answer_to_show(query, results)
|
| 60 |
+
|
| 61 |
+
print(answer)
|
Engaigemodelling.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import fitz # PyMuPDF
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
from sentence_transformers import SentenceTransformer
|
| 5 |
+
import numpy as np
|
| 6 |
+
import faiss
|
| 7 |
+
import json
|
| 8 |
+
import re
|
| 9 |
+
|
| 10 |
+
# This folder should contain all the pdf files which we need to work on . Below given is just an example
|
| 11 |
+
pdf_folder = '/Users/shivangsinha/Downloads/personalProject'
|
| 12 |
+
pdf_text_data = {}
|
| 13 |
+
embeddings = []
|
| 14 |
+
metadata = []
|
| 15 |
+
|
| 16 |
+
# Initialize the sentence transformer model
|
| 17 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 18 |
+
#model = SentenceTransformer('paraphrase-MiniLM-L6-v2') - Also tried with other model but seems the current one is working better.
|
| 19 |
+
|
| 20 |
+
# converting tensor to string so that to store it in json format.
|
| 21 |
+
def tensor_to_string(tensor):
|
| 22 |
+
return tensor.numpy().decode("utf-8") # Assuming utf-8 encoding
|
| 23 |
+
|
| 24 |
+
# extract text based on page number so that it is more relevant for search.
|
| 25 |
+
def extract_text_from_pdf_with_page_numbers(pdf_path):
|
| 26 |
+
doc = fitz.open(pdf_path)
|
| 27 |
+
text_pages = []
|
| 28 |
+
|
| 29 |
+
for page_num in range(len(doc)):
|
| 30 |
+
page = doc.load_page(page_num)
|
| 31 |
+
text = page.get_text()
|
| 32 |
+
text_pages.append((page_num + 1, text)) # Page numbers are 1-based in fitz
|
| 33 |
+
|
| 34 |
+
return text_pages
|
| 35 |
+
|
| 36 |
+
# Making sure inout data is not coming from table of content part and also preprocess all the text which are irrevant for the search.
|
| 37 |
+
def custom_standardization(input_data):
|
| 38 |
+
|
| 39 |
+
# If index pattern is seems to be part of table of content then simply ignore it.
|
| 40 |
+
index_pattern = re.compile(r'\.{3,}')
|
| 41 |
+
if bool(index_pattern.search(input_data.numpy().decode('utf-8'))):
|
| 42 |
+
return ""
|
| 43 |
+
|
| 44 |
+
# Remove URLs
|
| 45 |
+
stripped_urls = tf.strings.regex_replace(input_data, r"https?://\S+|www\.\S+", "")
|
| 46 |
+
|
| 47 |
+
# Remove email addresses
|
| 48 |
+
stripped_emails = tf.strings.regex_replace(stripped_urls, r"\S+@\S+", "")
|
| 49 |
+
|
| 50 |
+
# Remove text in angular brackets (usually HTML tags)
|
| 51 |
+
stripped_brackets = tf.strings.regex_replace(stripped_emails, r"<.*?>", "")
|
| 52 |
+
|
| 53 |
+
# Remove any square brackets and leave the text within square brackets
|
| 54 |
+
stripped_square_brackets = tf.strings.regex_replace(stripped_brackets, r"\[|\]", "")
|
| 55 |
+
|
| 56 |
+
# Remove alphanumeric characters with digits
|
| 57 |
+
stripped_digits = tf.strings.regex_replace(stripped_square_brackets, r"\w*\d\w*", "")
|
| 58 |
+
|
| 59 |
+
# Remove non-alphabet characters
|
| 60 |
+
stripped_non_alpha = tf.strings.regex_replace(stripped_digits, r"[^a-zA-Z\s]", "")
|
| 61 |
+
|
| 62 |
+
# Replace multiple whitespaces with a single whitespace
|
| 63 |
+
standardized_text = tf.strings.regex_replace(stripped_non_alpha, r"\s+", " ")
|
| 64 |
+
|
| 65 |
+
return standardized_text.numpy().decode('utf-8')
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
# For the time being I am using the pattern of question and answer. I am splitting up text into paragraphs which ends with ? mark
|
| 69 |
+
def split_into_paragraphs(text):
|
| 70 |
+
pattern = r'(?<=\n)(?=\d+\.)'
|
| 71 |
+
|
| 72 |
+
# Split text using the pattern
|
| 73 |
+
paragraphs = re.split(pattern, text)
|
| 74 |
+
|
| 75 |
+
# Remove leading/trailing whitespace from each paragraph and filter out empty paragraphs
|
| 76 |
+
paragraphs = [paragraph.strip() for paragraph in paragraphs if paragraph.strip()]
|
| 77 |
+
|
| 78 |
+
return paragraphs
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# This part is for storing the vector of a paragraph in a required format
|
| 82 |
+
def text_to_vectors(paragraphs):
|
| 83 |
+
vectors = model.encode(paragraphs)
|
| 84 |
+
return vectors
|
| 85 |
+
|
| 86 |
+
# This split is used to Answer the query or simply show the relevant text from the book.
|
| 87 |
+
def split_into_qa(text):
|
| 88 |
+
# Find the last occurrence of a question mark
|
| 89 |
+
index_pattern = re.compile(r'\.{3,}')
|
| 90 |
+
# Split the text at each question mark followed by a newline or space
|
| 91 |
+
match = re.search(r'(.*\?.*?)\n', text, re.DOTALL)
|
| 92 |
+
|
| 93 |
+
# If a match is found, split the text accordingly
|
| 94 |
+
if match:
|
| 95 |
+
question = match.group(1).strip() # The part before the last question mark
|
| 96 |
+
answer = text[match.end():].strip() # The part after the last question mark
|
| 97 |
+
|
| 98 |
+
# Filter out index-like entries in both question and answer
|
| 99 |
+
if index_pattern.search(question):
|
| 100 |
+
question = "" # Ignore this as it looks like an index entry
|
| 101 |
+
if index_pattern.search(answer):
|
| 102 |
+
answer = "" # Ignore this as it looks like an index entry
|
| 103 |
+
else:
|
| 104 |
+
question = text.strip() # No question mark found, consider the entire text as the question
|
| 105 |
+
answer = "" # No answer part
|
| 106 |
+
|
| 107 |
+
return question, answer
|
| 108 |
+
|
| 109 |
+
# storing vector to use it later while querying
|
| 110 |
+
def store_vectors(paragraphs, vectors, metadata, filename, page_num):
|
| 111 |
+
for i, (paragraph, vector) in enumerate(zip(paragraphs, vectors)):
|
| 112 |
+
original_text = paragraph
|
| 113 |
+
question,answer = split_into_qa(original_text)
|
| 114 |
+
original_text = paragraph[:500] # Store the first 500 characters of the original text
|
| 115 |
+
standardized_text = custom_standardization(tf.constant(paragraph))
|
| 116 |
+
vector = model.encode(standardized_text).tolist() # Recompute vector for standardized text
|
| 117 |
+
metadata.append({
|
| 118 |
+
"index": f'paragraph-{i}',
|
| 119 |
+
"filename": filename,
|
| 120 |
+
"page_num": page_num,
|
| 121 |
+
"standardized_text": standardized_text,
|
| 122 |
+
"question_text":question,
|
| 123 |
+
"answerable_text":answer
|
| 124 |
+
})
|
| 125 |
+
embeddings.append(vector)
|
| 126 |
+
|
| 127 |
+
for filename in os.listdir(pdf_folder):
|
| 128 |
+
if filename.endswith('.pdf'):
|
| 129 |
+
pdf_path = os.path.join(pdf_folder, filename)
|
| 130 |
+
text_pages = extract_text_from_pdf_with_page_numbers(pdf_path)
|
| 131 |
+
for page_num, text in text_pages:
|
| 132 |
+
paragraphs = split_into_paragraphs(text)
|
| 133 |
+
vectors = text_to_vectors(paragraphs)
|
| 134 |
+
store_vectors(paragraphs, vectors, metadata, filename, page_num)
|
| 135 |
+
pdf_text_data[filename] = text_pages
|
| 136 |
+
|
| 137 |
+
# Save FAISS index and metadata to JSON
|
| 138 |
+
index_path = 'vector_indexNLP.faiss'
|
| 139 |
+
metadata_path = 'metadataNLP.json'
|
| 140 |
+
|
| 141 |
+
# Convert embeddings to numpy array for FAISS
|
| 142 |
+
embeddings_array = np.array(embeddings, dtype='float32')
|
| 143 |
+
|
| 144 |
+
# Initialize FAISS index
|
| 145 |
+
dimension = embeddings_array.shape[1] # Dimension of the embeddings
|
| 146 |
+
index = faiss.IndexFlatL2(dimension)
|
| 147 |
+
|
| 148 |
+
# Add embeddings in batches to avoid memory issues. I faced some issue while adding index
|
| 149 |
+
batch_size = 1000 # Adjust batch size based on available memory
|
| 150 |
+
for i in range(0, len(embeddings), batch_size):
|
| 151 |
+
batch_embeddings = embeddings_array[i:i+batch_size]
|
| 152 |
+
index.add(batch_embeddings)
|
| 153 |
+
|
| 154 |
+
# Save the FAISS index
|
| 155 |
+
faiss.write_index(index, index_path)
|
| 156 |
+
|
| 157 |
+
# Save metadata
|
| 158 |
+
with open(metadata_path, 'w') as f:
|
| 159 |
+
json.dump(metadata, f)
|
| 160 |
+
|
| 161 |
+
print(f"FAISS index saved to: {index_path}")
|
| 162 |
+
print(f"Metadata saved to: {metadata_path}")
|
Procfile
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
web: gunicorn app:app
|
app.py
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify,render_template
|
| 2 |
+
from flask_cors import CORS
|
| 3 |
+
import requests
|
| 4 |
+
from sentence_transformers import SentenceTransformer
|
| 5 |
+
import faiss
|
| 6 |
+
import json
|
| 7 |
+
import numpy as np
|
| 8 |
+
import os
|
| 9 |
+
from flask import Flask, request, jsonify
|
| 10 |
+
from flask_cors import CORS
|
| 11 |
+
from werkzeug.utils import secure_filename
|
| 12 |
+
import fitz # PyMuPDF
|
| 13 |
+
import tensorflow as tf
|
| 14 |
+
from sentence_transformers import SentenceTransformer
|
| 15 |
+
import numpy as np
|
| 16 |
+
import faiss
|
| 17 |
+
import json
|
| 18 |
+
import re
|
| 19 |
+
import shutil
|
| 20 |
+
|
| 21 |
+
app = Flask(__name__)
|
| 22 |
+
CORS(app) # Enable CORS for all routes
|
| 23 |
+
|
| 24 |
+
@app.route('/')
|
| 25 |
+
def index():
|
| 26 |
+
return render_template('index.html')
|
| 27 |
+
|
| 28 |
+
UPLOAD_FOLDER = 'uploads'
|
| 29 |
+
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
| 30 |
+
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
| 31 |
+
|
| 32 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 33 |
+
|
| 34 |
+
index_path = 'vector_index1.faiss'
|
| 35 |
+
metadata_path = 'metadata1.json'
|
| 36 |
+
|
| 37 |
+
embeddings = []
|
| 38 |
+
metadata = []
|
| 39 |
+
|
| 40 |
+
def tensor_to_string(tensor):
|
| 41 |
+
return tensor.numpy().decode("utf-8")
|
| 42 |
+
|
| 43 |
+
def extract_text_from_pdf_with_page_numbers(pdf_path):
|
| 44 |
+
doc = fitz.open(pdf_path)
|
| 45 |
+
text_pages = []
|
| 46 |
+
for page_num in range(len(doc)):
|
| 47 |
+
page = doc.load_page(page_num)
|
| 48 |
+
text = page.get_text()
|
| 49 |
+
text_pages.append((page_num + 1, text))
|
| 50 |
+
return text_pages
|
| 51 |
+
|
| 52 |
+
def custom_standardization(input_data):
|
| 53 |
+
index_pattern = re.compile(r'\.{3,}')
|
| 54 |
+
if bool(index_pattern.search(input_data.numpy().decode('utf-8'))):
|
| 55 |
+
return ""
|
| 56 |
+
stripped_urls = tf.strings.regex_replace(input_data, r"https?://\S+|www\.\S+", "")
|
| 57 |
+
stripped_emails = tf.strings.regex_replace(stripped_urls, r"\S+@\S+", "")
|
| 58 |
+
stripped_brackets = tf.strings.regex_replace(stripped_emails, r"<.*?>", "")
|
| 59 |
+
stripped_square_brackets = tf.strings.regex_replace(stripped_brackets, r"\[|\]", "")
|
| 60 |
+
stripped_digits = tf.strings.regex_replace(stripped_square_brackets, r"\w*\d\w*", "")
|
| 61 |
+
stripped_non_alpha = tf.strings.regex_replace(stripped_digits, r"[^a-zA-Z\s]", "")
|
| 62 |
+
standardized_text = tf.strings.regex_replace(stripped_non_alpha, r"\s+", " ")
|
| 63 |
+
return standardized_text.numpy().decode('utf-8')
|
| 64 |
+
|
| 65 |
+
def split_into_paragraphs(text):
|
| 66 |
+
# pattern = r'(?<=\n)(?=\d+)'
|
| 67 |
+
paragraphs = re.split(r'(?<=\n)(?=\d+|(?=\n\s*\n))', text)
|
| 68 |
+
paragraphs = [paragraph.strip() for paragraph in paragraphs if paragraph.strip()]
|
| 69 |
+
return paragraphs
|
| 70 |
+
|
| 71 |
+
def text_to_vectors(paragraphs):
|
| 72 |
+
vectors = model.encode(paragraphs)
|
| 73 |
+
return vectors
|
| 74 |
+
|
| 75 |
+
def split_into_qa(text):
|
| 76 |
+
# Define the regex pattern to capture the question and answer in one line
|
| 77 |
+
index_pattern = re.compile(r'\.{3,}')
|
| 78 |
+
# Split the text at each question mark followed by a newline or space
|
| 79 |
+
match = re.search(r'(.*\?.*?)\n', text, re.DOTALL)
|
| 80 |
+
|
| 81 |
+
# If a match is found, split the text accordingly
|
| 82 |
+
if match:
|
| 83 |
+
question = match.group(1).strip() # The part before the last question mark
|
| 84 |
+
answer = text[match.end():].strip() # The part after the last question mark
|
| 85 |
+
|
| 86 |
+
# Filter out index-like entries in both question and answer
|
| 87 |
+
if index_pattern.search(question):
|
| 88 |
+
question = "" # Ignore this as it looks like an index entry
|
| 89 |
+
if index_pattern.search(answer):
|
| 90 |
+
answer = "" # Ignore this as it looks like an index entry
|
| 91 |
+
else:
|
| 92 |
+
question = text.strip() # No question mark found, consider the entire text as the question
|
| 93 |
+
answer = "" # No answer part
|
| 94 |
+
|
| 95 |
+
return question, answer
|
| 96 |
+
|
| 97 |
+
def store_vectors(paragraphs, vectors, metadata, filename, page_num):
|
| 98 |
+
for i, (paragraph, vector) in enumerate(zip(paragraphs, vectors)):
|
| 99 |
+
original_text = paragraph
|
| 100 |
+
question, answer = split_into_qa(original_text)
|
| 101 |
+
original_text = paragraph[:500]
|
| 102 |
+
standardized_text = custom_standardization(tf.constant(paragraph))
|
| 103 |
+
vector = model.encode(standardized_text).tolist()
|
| 104 |
+
metadata.append({
|
| 105 |
+
"index": f'paragraph-{i}',
|
| 106 |
+
"filename": filename,
|
| 107 |
+
"page_num": page_num,
|
| 108 |
+
"standardized_text": standardized_text,
|
| 109 |
+
"question_text": question,
|
| 110 |
+
"answerable_text": answer
|
| 111 |
+
})
|
| 112 |
+
embeddings.append(vector)
|
| 113 |
+
|
| 114 |
+
@app.route('/upload', methods=['POST'])
|
| 115 |
+
def upload_pdf():
|
| 116 |
+
if 'file' not in request.files:
|
| 117 |
+
return jsonify({'error': 'No file part'}), 400
|
| 118 |
+
file = request.files['file']
|
| 119 |
+
if file.filename == '':
|
| 120 |
+
return jsonify({'error': 'No selected file'}), 400
|
| 121 |
+
if file:
|
| 122 |
+
# filename = secure_filename(file.filename)
|
| 123 |
+
# file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
| 124 |
+
# file.save(file_path)
|
| 125 |
+
|
| 126 |
+
filename = secure_filename(file.filename)
|
| 127 |
+
|
| 128 |
+
# Delete the uploads folder and its contents
|
| 129 |
+
if os.path.exists(app.config['UPLOAD_FOLDER']):
|
| 130 |
+
shutil.rmtree(app.config['UPLOAD_FOLDER'])
|
| 131 |
+
|
| 132 |
+
# Recreate the uploads folder
|
| 133 |
+
os.makedirs(app.config['UPLOAD_FOLDER'])
|
| 134 |
+
|
| 135 |
+
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
| 136 |
+
file.save(file_path)
|
| 137 |
+
try:
|
| 138 |
+
os.remove('metadata1.json')
|
| 139 |
+
os.remove('vector_index1.faiss')
|
| 140 |
+
except OSError as e:
|
| 141 |
+
print(f"Error: {e.strerror}")
|
| 142 |
+
process_pdf(file_path, filename)
|
| 143 |
+
print(file_path+filename)
|
| 144 |
+
return jsonify({'success': 'File uploaded and processed successfully'})
|
| 145 |
+
|
| 146 |
+
def process_pdf(file_path, filename):
|
| 147 |
+
text_pages = extract_text_from_pdf_with_page_numbers(file_path)
|
| 148 |
+
for page_num, text in text_pages:
|
| 149 |
+
paragraphs = split_into_paragraphs(text)
|
| 150 |
+
vectors = text_to_vectors(paragraphs)
|
| 151 |
+
store_vectors(paragraphs, vectors, metadata, filename, page_num)
|
| 152 |
+
save_index_and_metadata()
|
| 153 |
+
|
| 154 |
+
def save_index_and_metadata():
|
| 155 |
+
embeddings_array = np.array(embeddings, dtype='float32')
|
| 156 |
+
dimension = embeddings_array.shape[1]
|
| 157 |
+
index = faiss.IndexFlatL2(dimension)
|
| 158 |
+
batch_size = 1000
|
| 159 |
+
for i in range(0, len(embeddings), batch_size):
|
| 160 |
+
batch_embeddings = embeddings_array[i:i+batch_size]
|
| 161 |
+
index.add(batch_embeddings)
|
| 162 |
+
faiss.write_index(index, index_path)
|
| 163 |
+
with open(metadata_path, 'w') as f:
|
| 164 |
+
json.dump(metadata, f)
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
# Load FAISS index and metadata
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def convert_distance_to_similarity(distance):
|
| 172 |
+
# Assuming the distances are non-negative, we can use a simple conversion:
|
| 173 |
+
return 1 / (1 + distance) * 100
|
| 174 |
+
|
| 175 |
+
def query_index(query, model, index, metadata, top_k=5):
|
| 176 |
+
query_embedding = model.encode(query).reshape(1, -1).astype('float32')
|
| 177 |
+
D, I = index.search(query_embedding, top_k)
|
| 178 |
+
|
| 179 |
+
results = []
|
| 180 |
+
for i in range(top_k):
|
| 181 |
+
doc_metadata = metadata[I[0, i]]
|
| 182 |
+
similarity_score = convert_distance_to_similarity(D[0, i])
|
| 183 |
+
result = {
|
| 184 |
+
"filename": doc_metadata["filename"],
|
| 185 |
+
"page_num": doc_metadata["page_num"],
|
| 186 |
+
"standardized_text": doc_metadata["standardized_text"],
|
| 187 |
+
"question_text": doc_metadata["question_text"],
|
| 188 |
+
"answerable_text": doc_metadata["answerable_text"],
|
| 189 |
+
"score": similarity_score
|
| 190 |
+
}
|
| 191 |
+
results.append(result)
|
| 192 |
+
|
| 193 |
+
return results
|
| 194 |
+
|
| 195 |
+
def fetch_answer_from_external_api(question,result):
|
| 196 |
+
|
| 197 |
+
data = {
|
| 198 |
+
"messages": [
|
| 199 |
+
{
|
| 200 |
+
"content": "Question=" +question + ",answer to look from Uploaded pdf file and dont include the field name from the json file in answer section = " +str(result) + "answer=Based on your PDF provided , ",
|
| 201 |
+
"role": "user"
|
| 202 |
+
}
|
| 203 |
+
],
|
| 204 |
+
"model": "mixtral:8x7b-instruct-v0.1-q6_K"
|
| 205 |
+
}
|
| 206 |
+
print("data="+str(data))
|
| 207 |
+
response = requests.post('https://inf.cl.uni-trier.de/chat/', json=data, headers={'accept': 'application/json', 'Content-Type': 'application/json'})
|
| 208 |
+
response_data = response.json()
|
| 209 |
+
|
| 210 |
+
return response_data.get('response', '')
|
| 211 |
+
|
| 212 |
+
def create_answer_to_show(query, results):
|
| 213 |
+
answer = f"Based on your query '{query}', the following relevant information was found:\n\n"
|
| 214 |
+
for result in results:
|
| 215 |
+
answer += "\n------------------------------------------------------------------------------------------------------------------\n"
|
| 216 |
+
answer += f"Filename: {result['filename']}\n"
|
| 217 |
+
answer += f"Page number: {result['page_num']}\n"
|
| 218 |
+
answer += f"Related keywords: {result['question_text']}...\n"
|
| 219 |
+
if result['answerable_text'] != "":
|
| 220 |
+
answer += f"Answer: {result['answerable_text'][:500]}\n"
|
| 221 |
+
answer += f"Relevancy Score: {result['score']}\n"
|
| 222 |
+
answer += "\nFor more detailed information, please refer to the respective original texts.\n\n\n"
|
| 223 |
+
return answer
|
| 224 |
+
|
| 225 |
+
@app.route('/api/query', methods=['POST'])
|
| 226 |
+
def query_endpoint():
|
| 227 |
+
data = request.json
|
| 228 |
+
query = data.get('query', '')
|
| 229 |
+
|
| 230 |
+
top_k = data.get('top_k', 5)
|
| 231 |
+
index = faiss.read_index(index_path)
|
| 232 |
+
with open(metadata_path, 'r') as f:
|
| 233 |
+
metadata = json.load(f)
|
| 234 |
+
results = query_index(query, model, index, metadata, top_k)
|
| 235 |
+
formatted_answer = create_answer_to_show(query, results)
|
| 236 |
+
answer2 = fetch_answer_from_external_api(query,results[0])
|
| 237 |
+
print("=>"+answer2)
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
return jsonify({'answer': answer2+"\n\n"+formatted_answer })
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
if __name__ == '__main__':
|
| 246 |
+
app.run(debug=True)
|
metadata1.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
faiss_cpu==1.8.0
|
| 2 |
+
sentence_transformers==3.0.1
|
| 3 |
+
tensorflow==2.16.1
|
| 4 |
+
Flask==3.0.3
|
| 5 |
+
Flask-Cors==4.0.1
|
| 6 |
+
numpy
|
| 7 |
+
tf-keras
|
| 8 |
+
PyMuPDF==1.24.5
|
| 9 |
+
gunicorn
|
static/css/styles.css
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@import url('https://fonts.googleapis.com/css?family=Exo:400,700');
|
| 2 |
+
|
| 3 |
+
* {
|
| 4 |
+
margin: 0px;
|
| 5 |
+
padding: 0px;
|
| 6 |
+
box-sizing: border-box;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
body {
|
| 10 |
+
font-family: 'Exo', sans-serif;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
.context {
|
| 15 |
+
width: 100%;
|
| 16 |
+
position: absolute;
|
| 17 |
+
top: 30vh;
|
| 18 |
+
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
.context h1 {
|
| 22 |
+
text-align: center;
|
| 23 |
+
color: #fefefe;
|
| 24 |
+
font-size: 150px;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
.context h3 {
|
| 28 |
+
text-align: center;
|
| 29 |
+
color: #e4b714;
|
| 30 |
+
font-size: 30px;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
.area {
|
| 35 |
+
background: #000428;
|
| 36 |
+
background: -webkit-linear-gradient(to right, #000428, #004e92);
|
| 37 |
+
background: linear-gradient(to right, #000428, #004e92);
|
| 38 |
+
width: 100%;
|
| 39 |
+
height: 100vh;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
.circles {
|
| 43 |
+
position: absolute;
|
| 44 |
+
top: 0;
|
| 45 |
+
left: 0;
|
| 46 |
+
width: 100%;
|
| 47 |
+
height: 100%;
|
| 48 |
+
overflow: hidden;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
.circles li {
|
| 52 |
+
position: absolute;
|
| 53 |
+
display: block;
|
| 54 |
+
list-style: none;
|
| 55 |
+
width: 20px;
|
| 56 |
+
height: 20px;
|
| 57 |
+
background: rgba(255, 255, 255, 0.2);
|
| 58 |
+
animation: animate 25s linear infinite;
|
| 59 |
+
bottom: -150px;
|
| 60 |
+
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
.circles li:nth-child(1) {
|
| 64 |
+
left: 25%;
|
| 65 |
+
width: 80px;
|
| 66 |
+
height: 80px;
|
| 67 |
+
animation-delay: 0s;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
.circles li:nth-child(2) {
|
| 72 |
+
left: 10%;
|
| 73 |
+
width: 20px;
|
| 74 |
+
height: 20px;
|
| 75 |
+
animation-delay: 2s;
|
| 76 |
+
animation-duration: 12s;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
.circles li:nth-child(3) {
|
| 80 |
+
left: 70%;
|
| 81 |
+
width: 20px;
|
| 82 |
+
height: 20px;
|
| 83 |
+
animation-delay: 4s;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
.circles li:nth-child(4) {
|
| 87 |
+
left: 40%;
|
| 88 |
+
width: 60px;
|
| 89 |
+
height: 60px;
|
| 90 |
+
animation-delay: 0s;
|
| 91 |
+
animation-duration: 18s;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
.circles li:nth-child(5) {
|
| 95 |
+
left: 65%;
|
| 96 |
+
width: 20px;
|
| 97 |
+
height: 20px;
|
| 98 |
+
animation-delay: 0s;
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
.circles li:nth-child(6) {
|
| 102 |
+
left: 75%;
|
| 103 |
+
width: 110px;
|
| 104 |
+
height: 110px;
|
| 105 |
+
animation-delay: 3s;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
.circles li:nth-child(7) {
|
| 109 |
+
left: 35%;
|
| 110 |
+
width: 150px;
|
| 111 |
+
height: 150px;
|
| 112 |
+
animation-delay: 7s;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
.circles li:nth-child(8) {
|
| 116 |
+
left: 50%;
|
| 117 |
+
width: 25px;
|
| 118 |
+
height: 25px;
|
| 119 |
+
animation-delay: 15s;
|
| 120 |
+
animation-duration: 45s;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
.circles li:nth-child(9) {
|
| 124 |
+
left: 20%;
|
| 125 |
+
width: 15px;
|
| 126 |
+
height: 15px;
|
| 127 |
+
animation-delay: 2s;
|
| 128 |
+
animation-duration: 35s;
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
.circles li:nth-child(10) {
|
| 132 |
+
left: 85%;
|
| 133 |
+
width: 150px;
|
| 134 |
+
height: 150px;
|
| 135 |
+
animation-delay: 0s;
|
| 136 |
+
animation-duration: 11s;
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
@keyframes animate {
|
| 142 |
+
|
| 143 |
+
0% {
|
| 144 |
+
transform: translateY(0) rotate(0deg);
|
| 145 |
+
opacity: 1;
|
| 146 |
+
border-radius: 0;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
100% {
|
| 150 |
+
transform: translateY(-1000px) rotate(720deg);
|
| 151 |
+
opacity: 0;
|
| 152 |
+
border-radius: 50%;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
.context {
|
| 158 |
+
text-align: center;
|
| 159 |
+
color: #fff;
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
.button-container {
|
| 163 |
+
margin-top: 50px;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
.register-button {
|
| 167 |
+
display: inline-block;
|
| 168 |
+
padding: 10px 20px;
|
| 169 |
+
background-color: transparent;
|
| 170 |
+
border: 2px solid #fff;
|
| 171 |
+
color: #fff;
|
| 172 |
+
text-decoration: none;
|
| 173 |
+
font-size: 18px;
|
| 174 |
+
border-radius: 15px;
|
| 175 |
+
transition: background-color 0.3s, color 0.3s;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
.register-button:hover {
|
| 179 |
+
transform: scale(1.09);
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
/* Responsive Design */
|
| 184 |
+
|
| 185 |
+
/* For Mobile Devices */
|
| 186 |
+
@media (max-width: 767px) {
|
| 187 |
+
.context h1 {
|
| 188 |
+
font-size: 80px;
|
| 189 |
+
/* Adjust the font size for smaller screens */
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
.context h3 {
|
| 193 |
+
font-size: 20px;
|
| 194 |
+
/* Adjust the font size for smaller screens */
|
| 195 |
+
}
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
/* For Tablet Devices */
|
| 199 |
+
@media (min-width: 768px) and (max-width: 1023px) {
|
| 200 |
+
.context h1 {
|
| 201 |
+
font-size: 120px;
|
| 202 |
+
/* Adjust the font size for tablet screens */
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
.context h3 {
|
| 206 |
+
font-size: 25px;
|
| 207 |
+
/* Adjust the font size for tablet screens */
|
| 208 |
+
}
|
| 209 |
+
}
|
static/js/scripts.js
ADDED
|
File without changes
|
templates/index.html
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8">
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<title>Shivang - Flask api</title>
|
| 8 |
+
<link rel="icon" href="/static/images/logo.ico" type="image/x-icon">
|
| 9 |
+
<link rel="stylesheet" href="/static/css/styles.css">
|
| 10 |
+
<link href="https://fonts.googleapis.com/css?family=Exo:400,700" rel="stylesheet">
|
| 11 |
+
</head>
|
| 12 |
+
|
| 13 |
+
<body>
|
| 14 |
+
<div class="area">
|
| 15 |
+
<div class="circles">
|
| 16 |
+
<ul>
|
| 17 |
+
<li></li>
|
| 18 |
+
<li></li>
|
| 19 |
+
<li></li>
|
| 20 |
+
<li></li>
|
| 21 |
+
<li></li>
|
| 22 |
+
<li></li>
|
| 23 |
+
<li></li>
|
| 24 |
+
<li></li>
|
| 25 |
+
<li></li>
|
| 26 |
+
<li></li>
|
| 27 |
+
</ul>
|
| 28 |
+
</div>
|
| 29 |
+
</div>
|
| 30 |
+
<div class="context">
|
| 31 |
+
<h1>Flask Api calls</h1>
|
| 32 |
+
<h4>By Shivang sinha</h4>
|
| 33 |
+
<ul>
|
| 34 |
+
<li>/api/query</li>
|
| 35 |
+
<li>/upload</li>
|
| 36 |
+
</ul>
|
| 37 |
+
</div>
|
| 38 |
+
</body>
|
| 39 |
+
|
| 40 |
+
</html>
|
uploads/employee_handbook_print_1.pdf
ADDED
|
Binary file (649 kB). View file
|
|
|
vercel.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": 2,
|
| 3 |
+
"builds": [
|
| 4 |
+
{"src": "app.py", "use": "@vercel/python"}
|
| 5 |
+
],
|
| 6 |
+
"routes": [
|
| 7 |
+
{"src": "/(.*)", "dest": "app.py"}
|
| 8 |
+
]
|
| 9 |
+
}
|