Spaces:
Runtime error
Runtime error
ddovidovich commited on
Commit ·
eac2263
1
Parent(s): 57eab27
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,8 @@ os.system('apt-get update -y')
|
|
| 8 |
os.system('apt-get install tesseract-ocr -y')
|
| 9 |
os.system('pip install -q pytesseract')
|
| 10 |
|
|
|
|
|
|
|
| 11 |
from langchain.document_loaders import PyPDFLoader
|
| 12 |
from langchain.vectorstores import Chroma
|
| 13 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
|
@@ -20,7 +22,27 @@ from datetime import datetime
|
|
| 20 |
st.subheader("Upload CV in PDF or image format")
|
| 21 |
uploaded_file = st.file_uploader("Upload PDF or Images", type=["pdf","png","jpg","jpeg"])
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
st.write("extension: ", file_extension)
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
os.system('apt-get install tesseract-ocr -y')
|
| 9 |
os.system('pip install -q pytesseract')
|
| 10 |
|
| 11 |
+
os.environ["OPENAI_API_KEY"] = os.getenv("SECRET_KEY")
|
| 12 |
+
|
| 13 |
from langchain.document_loaders import PyPDFLoader
|
| 14 |
from langchain.vectorstores import Chroma
|
| 15 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
|
|
|
| 22 |
st.subheader("Upload CV in PDF or image format")
|
| 23 |
uploaded_file = st.file_uploader("Upload PDF or Images", type=["pdf","png","jpg","jpeg"])
|
| 24 |
|
| 25 |
+
nltk.download('punkt')
|
| 26 |
+
nltk.download('averaged_perceptron_tagger')
|
|
|
|
| 27 |
|
| 28 |
+
if uploaded_file:
|
| 29 |
+
file_name, file_extension = os.path.splitext(uploaded_file.name)
|
| 30 |
+
if file_extension != '.pdf':
|
| 31 |
+
uploaded_image = Image.open(uploaded_file)
|
| 32 |
+
img = uploaded_image.convert('RGB')
|
| 33 |
+
loader = UnstructuredPDFLoader(img)
|
| 34 |
+
#img.save(r'/content/CV.pdf')
|
| 35 |
+
#PDFFileName = '/content/CV.pdf'
|
| 36 |
+
else:
|
| 37 |
+
loader = UnstructuredPDFLoader(uploaded_file)
|
| 38 |
+
|
| 39 |
+
pages = loader.load_and_split()
|
| 40 |
+
embeddings = OpenAIEmbeddings()
|
| 41 |
+
docsearch = Chroma.from_documents(pages, embeddings).as_retriever()
|
| 42 |
+
|
| 43 |
+
current_date = datetime.now()
|
| 44 |
+
query = "Output informatio, (all in English), from the document in JSON format: full name, age, languages, education, school, places of work, skills.If the date of birth is not indicated, then please calculate the approximate age of the candidate based on the information provided in the document, for calculations, take into account that graduation from the university is usually at 22 years old. Current date = "+ current_date.date().strftime('%Y-%m-%d')
|
| 45 |
+
docs = docsearch.get_relevant_documents(query)
|
| 46 |
+
chain = load_qa_chain(ChatOpenAI(temperature=0), chain_type="stuff")
|
| 47 |
+
output = chain.run(input_documents=docs, question=query)
|
| 48 |
+
st.write(output)
|