Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ import fitz
|
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
from langchain_community.document_loaders import UnstructuredPDFLoader
|
| 7 |
from langchain_community.vectorstores import FAISS
|
| 8 |
-
from
|
| 9 |
from langchain_text_splitters import CharacterTextSplitter
|
| 10 |
from langchain_groq import ChatGroq
|
| 11 |
from langchain.memory import ConversationBufferMemory
|
|
@@ -15,52 +15,15 @@ import sys
|
|
| 15 |
import pytesseract
|
| 16 |
from pdf2image import convert_from_path
|
| 17 |
|
| 18 |
-
def check_installation(command):
|
| 19 |
-
try:
|
| 20 |
-
result = subprocess.run([command, '--version'], capture_output=True, text=True)
|
| 21 |
-
return result.returncode == 0, result.stdout
|
| 22 |
-
except FileNotFoundError:
|
| 23 |
-
return False, f"{command} not found"
|
| 24 |
-
|
| 25 |
-
def check_dependencies():
|
| 26 |
-
dependencies = {
|
| 27 |
-
'tesseract': '/usr/bin/tesseract',
|
| 28 |
-
'pdftoppm': '/usr/bin/pdftoppm', # Part of poppler-utils
|
| 29 |
-
}
|
| 30 |
-
|
| 31 |
-
status = {}
|
| 32 |
-
for dep, path in dependencies.items():
|
| 33 |
-
installed, version = check_installation(path)
|
| 34 |
-
status[dep] = {
|
| 35 |
-
'installed': installed,
|
| 36 |
-
'path': path,
|
| 37 |
-
'version': version if installed else 'Not found'
|
| 38 |
-
}
|
| 39 |
-
|
| 40 |
-
return status
|
| 41 |
-
|
| 42 |
-
def log_dependency_status(status):
|
| 43 |
-
print("Dependency Status:")
|
| 44 |
-
for dep, info in status.items():
|
| 45 |
-
print(f"{dep}:")
|
| 46 |
-
print(f" Installed: {info['installed']}")
|
| 47 |
-
print(f" Path: {info['path']}")
|
| 48 |
-
print(f" Version: {info['version']}")
|
| 49 |
-
print("\nEnvironment Variables:")
|
| 50 |
-
for key, value in os.environ.items():
|
| 51 |
-
if 'PATH' in key or 'PYTHONPATH' in key:
|
| 52 |
-
print(f"{key}: {value}")
|
| 53 |
-
|
| 54 |
-
# Run dependency check
|
| 55 |
-
dependency_status = check_dependencies()
|
| 56 |
-
log_dependency_status(dependency_status)
|
| 57 |
|
| 58 |
# Load environment variables
|
| 59 |
load_dotenv()
|
| 60 |
secret_key = os.getenv("GROQ_API_KEY")
|
| 61 |
|
| 62 |
os.environ["GROQ_API_KEY"] = secret_key
|
|
|
|
| 63 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
|
|
|
|
| 64 |
# Ensure the necessary folders exist
|
| 65 |
UPLOAD_FOLDER = 'uploads/'
|
| 66 |
AUDIO_FOLDER = 'audio/'
|
|
@@ -83,6 +46,7 @@ def prepare_vectorstore(data):
|
|
| 83 |
texts = data
|
| 84 |
vectorstore = FAISS.from_texts(texts, embeddings)
|
| 85 |
vectorstore.save_local("faiss_index")
|
|
|
|
| 86 |
return vectorstore
|
| 87 |
|
| 88 |
def load_vectorstore():
|
|
|
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
from langchain_community.document_loaders import UnstructuredPDFLoader
|
| 7 |
from langchain_community.vectorstores import FAISS
|
| 8 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 9 |
from langchain_text_splitters import CharacterTextSplitter
|
| 10 |
from langchain_groq import ChatGroq
|
| 11 |
from langchain.memory import ConversationBufferMemory
|
|
|
|
| 15 |
import pytesseract
|
| 16 |
from pdf2image import convert_from_path
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# Load environment variables
|
| 20 |
load_dotenv()
|
| 21 |
secret_key = os.getenv("GROQ_API_KEY")
|
| 22 |
|
| 23 |
os.environ["GROQ_API_KEY"] = secret_key
|
| 24 |
+
|
| 25 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
|
| 26 |
+
|
| 27 |
# Ensure the necessary folders exist
|
| 28 |
UPLOAD_FOLDER = 'uploads/'
|
| 29 |
AUDIO_FOLDER = 'audio/'
|
|
|
|
| 46 |
texts = data
|
| 47 |
vectorstore = FAISS.from_texts(texts, embeddings)
|
| 48 |
vectorstore.save_local("faiss_index")
|
| 49 |
+
|
| 50 |
return vectorstore
|
| 51 |
|
| 52 |
def load_vectorstore():
|