ziadsameh32 commited on
Commit
2adad77
·
1 Parent(s): e976f64

Initial FastAPI CrewAI setup

Browse files
rag/automation_metadata.py CHANGED
@@ -1,7 +1,13 @@
1
  # rag/automation_metadata.py
2
  from typing import Dict, Any, List, Optional
3
 
4
- from core.books.storage import fetch_pending_raw_docs, mark_raw_status, upsert_document_metadata
 
 
 
 
 
 
5
  from agents.books.apa_agent import run_metadata_agent # نفس اللي عندك
6
  from schemas.books.sources_schema import SourceForAgent, DocMetadata
7
 
 
1
  # rag/automation_metadata.py
2
  from typing import Dict, Any, List, Optional
3
 
4
+ from core.books.storage import fetch_pending
5
+
6
+
7
+
8
+
9
+
10
+ _raw_docs, mark_raw_status, upsert_document_metadata
11
  from agents.books.apa_agent import run_metadata_agent # نفس اللي عندك
12
  from schemas.books.sources_schema import SourceForAgent, DocMetadata
13
 
requirements.txt CHANGED
@@ -10,10 +10,10 @@ crewai[google-genai]
10
  langchain
11
  langchain_core
12
  langchain-community
13
- crawlee==1.1.0
14
  fitz
15
  apify-fingerprint-datapoints
16
- browserforge
17
  frontend
18
  parsel
19
  mistralai
@@ -22,4 +22,6 @@ sentence-transformers
22
  qdrant-client
23
  pymupdf
24
  rapidfuzz
25
- supabase
 
 
 
10
  langchain
11
  langchain_core
12
  langchain-community
13
+ # crawlee==1.1.0
14
  fitz
15
  apify-fingerprint-datapoints
16
+ # browserforge
17
  frontend
18
  parsel
19
  mistralai
 
22
  qdrant-client
23
  pymupdf
24
  rapidfuzz
25
+ supabase
26
+ crawlee==0.3.6
27
+ browserforge==1.1.2
tools/__init__.py CHANGED
@@ -2,13 +2,17 @@
2
  # from .scraper import web_scraping_tool
3
  # from .tavily import search_engine_tool, is_recent
4
  # from .scraper.scraper_crawlee import WebScrapingCrawleeTool
5
- from .scraper.scraper_bs4 import WebScrapingToolBS4
6
- from .scraper.scraper_built_in import pdf_tool, scraping_tool
7
- from .validate_url import URLValidatorTool
8
- from .scraper.no_agent.pdf_extractor import extract_pdf_content
9
- from .scraper.no_agent.bs4_scraper import scrape_with_bs4
10
- from .scraper.no_agent.crawlee_parse_scraper import crawl_parse_url
11
- from .scraper.no_agent.crawlee_bs_scraper import crawl_bs_url
12
- from .searching_scraping_queries import scrape_course, SerperExhaustedError,CURRENT_SERPER_INDEX
 
 
 
 
13
  from .quiz_runner import generate_quiz_for_course
14
- from .ocr__units_service import extract_text_from_pdf
 
2
  # from .scraper import web_scraping_tool
3
  # from .tavily import search_engine_tool, is_recent
4
  # from .scraper.scraper_crawlee import WebScrapingCrawleeTool
5
+ # from .scraper.scraper_bs4 import WebScrapingToolBS4
6
+ # from .scraper.scraper_built_in import pdf_tool, scraping_tool
7
+ # from .validate_url import URLValidatorTool
8
+ # from .scraper.no_agent.pdf_extractor import extract_pdf_content
9
+ # from .scraper.no_agent.bs4_scraper import scrape_with_bs4
10
+ # from .scraper.no_agent.crawlee_parse_scraper import crawl_parse_url
11
+ # from .scraper.no_agent.crawlee_bs_scraper import crawl_bs_url
12
+ from .searching_scraping_queries import (
13
+ scrape_course,
14
+ SerperExhaustedError,
15
+ CURRENT_SERPER_INDEX,
16
+ )
17
  from .quiz_runner import generate_quiz_for_course
18
+ from .ocr__units_service import extract_text_from_pdf