Spaces:
Sleeping
Sleeping
Commit ·
2adad77
1
Parent(s): e976f64
Initial FastAPI CrewAI setup
Browse files- rag/automation_metadata.py +7 -1
- requirements.txt +5 -3
- tools/__init__.py +13 -9
rag/automation_metadata.py
CHANGED
|
@@ -1,7 +1,13 @@
|
|
| 1 |
# rag/automation_metadata.py
|
| 2 |
from typing import Dict, Any, List, Optional
|
| 3 |
|
| 4 |
-
from core.books.storage import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
from agents.books.apa_agent import run_metadata_agent # نفس اللي عندك
|
| 6 |
from schemas.books.sources_schema import SourceForAgent, DocMetadata
|
| 7 |
|
|
|
|
| 1 |
# rag/automation_metadata.py
|
| 2 |
from typing import Dict, Any, List, Optional
|
| 3 |
|
| 4 |
+
from core.books.storage import fetch_pending
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
_raw_docs, mark_raw_status, upsert_document_metadata
|
| 11 |
from agents.books.apa_agent import run_metadata_agent # نفس اللي عندك
|
| 12 |
from schemas.books.sources_schema import SourceForAgent, DocMetadata
|
| 13 |
|
requirements.txt
CHANGED
|
@@ -10,10 +10,10 @@ crewai[google-genai]
|
|
| 10 |
langchain
|
| 11 |
langchain_core
|
| 12 |
langchain-community
|
| 13 |
-
crawlee==1.1.0
|
| 14 |
fitz
|
| 15 |
apify-fingerprint-datapoints
|
| 16 |
-
browserforge
|
| 17 |
frontend
|
| 18 |
parsel
|
| 19 |
mistralai
|
|
@@ -22,4 +22,6 @@ sentence-transformers
|
|
| 22 |
qdrant-client
|
| 23 |
pymupdf
|
| 24 |
rapidfuzz
|
| 25 |
-
supabase
|
|
|
|
|
|
|
|
|
| 10 |
langchain
|
| 11 |
langchain_core
|
| 12 |
langchain-community
|
| 13 |
+
# crawlee==1.1.0
|
| 14 |
fitz
|
| 15 |
apify-fingerprint-datapoints
|
| 16 |
+
# browserforge
|
| 17 |
frontend
|
| 18 |
parsel
|
| 19 |
mistralai
|
|
|
|
| 22 |
qdrant-client
|
| 23 |
pymupdf
|
| 24 |
rapidfuzz
|
| 25 |
+
supabase
|
| 26 |
+
crawlee==0.3.6
|
| 27 |
+
browserforge==1.1.2
|
tools/__init__.py
CHANGED
|
@@ -2,13 +2,17 @@
|
|
| 2 |
# from .scraper import web_scraping_tool
|
| 3 |
# from .tavily import search_engine_tool, is_recent
|
| 4 |
# from .scraper.scraper_crawlee import WebScrapingCrawleeTool
|
| 5 |
-
from .scraper.scraper_bs4 import WebScrapingToolBS4
|
| 6 |
-
from .scraper.scraper_built_in import pdf_tool, scraping_tool
|
| 7 |
-
from .validate_url import URLValidatorTool
|
| 8 |
-
from .scraper.no_agent.pdf_extractor import extract_pdf_content
|
| 9 |
-
from .scraper.no_agent.bs4_scraper import scrape_with_bs4
|
| 10 |
-
from .scraper.no_agent.crawlee_parse_scraper import crawl_parse_url
|
| 11 |
-
from .scraper.no_agent.crawlee_bs_scraper import crawl_bs_url
|
| 12 |
-
from .searching_scraping_queries import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
from .quiz_runner import generate_quiz_for_course
|
| 14 |
-
from .ocr__units_service import extract_text_from_pdf
|
|
|
|
| 2 |
# from .scraper import web_scraping_tool
|
| 3 |
# from .tavily import search_engine_tool, is_recent
|
| 4 |
# from .scraper.scraper_crawlee import WebScrapingCrawleeTool
|
| 5 |
+
# from .scraper.scraper_bs4 import WebScrapingToolBS4
|
| 6 |
+
# from .scraper.scraper_built_in import pdf_tool, scraping_tool
|
| 7 |
+
# from .validate_url import URLValidatorTool
|
| 8 |
+
# from .scraper.no_agent.pdf_extractor import extract_pdf_content
|
| 9 |
+
# from .scraper.no_agent.bs4_scraper import scrape_with_bs4
|
| 10 |
+
# from .scraper.no_agent.crawlee_parse_scraper import crawl_parse_url
|
| 11 |
+
# from .scraper.no_agent.crawlee_bs_scraper import crawl_bs_url
|
| 12 |
+
from .searching_scraping_queries import (
|
| 13 |
+
scrape_course,
|
| 14 |
+
SerperExhaustedError,
|
| 15 |
+
CURRENT_SERPER_INDEX,
|
| 16 |
+
)
|
| 17 |
from .quiz_runner import generate_quiz_for_course
|
| 18 |
+
from .ocr__units_service import extract_text_from_pdf
|