Spaces:
Sleeping
Sleeping
Create requirements.txt
Browse files- requirements.txt +25 -0
requirements.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Transformers and NLP Libraries
|
| 2 |
+
transformers==4.31.0 # For transformer-based models (AutoTokenizer, AutoModelForSeq2SeqLM)
|
| 3 |
+
sentence-transformers==2.2.2 # For sentence embeddings (SentenceTransformer)
|
| 4 |
+
spacy==3.7.1 # For NLP tasks like entity extraction
|
| 5 |
+
en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl # Large English model for spaCy
|
| 6 |
+
|
| 7 |
+
# Data Processing Libraries
|
| 8 |
+
pandas==2.0.3 # For data manipulation (if needed)
|
| 9 |
+
numpy==1.24.4 # For numerical computations
|
| 10 |
+
scikit-learn==1.3.0 # For TF-IDF vectorization and cosine similarity
|
| 11 |
+
nltk==3.8.1 # For sentence tokenization
|
| 12 |
+
|
| 13 |
+
# Document Parsing Libraries
|
| 14 |
+
PyMuPDF==1.23.5 # For PDF text extraction (fitz)
|
| 15 |
+
python-docx==1.1.0 # For DOCX text extraction
|
| 16 |
+
beautifulsoup4==4.12.2 # For HTML text extraction
|
| 17 |
+
|
| 18 |
+
# Graph-Based Algorithms
|
| 19 |
+
networkx==3.1 # For TextRank and LexRank summarization
|
| 20 |
+
|
| 21 |
+
# GPU Support
|
| 22 |
+
torch==2.0.1 # For GPU acceleration and tensor operations
|
| 23 |
+
|
| 24 |
+
# Regular Expressions
|
| 25 |
+
regex==2023.10.3 # Enhanced regular expression support (if needed beyond re)
|