ziadsameh32 commited on
Commit
7928652
·
1 Parent(s): 1bd8c3f
modules/__init__.py CHANGED
@@ -32,6 +32,8 @@ from .inputs import (
32
  # objectives,
33
  )
34
 
 
 
35
  # from .directory import (
36
  # output_dir,
37
  # keywords_dir,
 
32
  # objectives,
33
  )
34
 
35
+ from .serper_env import SERPER_API_KEYS, get_valid_serper_key,get_serper_credits
36
+
37
  # from .directory import (
38
  # output_dir,
39
  # keywords_dir,
modules/serper_env.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import http.client
2
+ import json
3
+ import os
4
+ import requests
5
+ SERPER_API_KEYS = [
6
+ os.getenv("SERPER_API_KEY_1", ""),
7
+ os.getenv("SERPER_API_KEY_2", ""),
8
+ os.getenv("SERPER_API_KEY_3", ""),
9
+ ]
10
+
11
+ CURRENT_SERPER_INDEX = 0
12
+
13
+
14
+ def get_serper_credits(api_key: str) -> int:
15
+ try:
16
+ conn = http.client.HTTPSConnection("google.serper.dev")
17
+ headers = {"X-API-KEY": api_key}
18
+ conn.request("GET", "/credits", headers=headers)
19
+ res = conn.getresponse()
20
+ data = json.loads(res.read().decode("utf-8"))
21
+ return int(data.get("credits", 0))
22
+ except:
23
+ return 0
24
+
25
+
26
+ def get_valid_serper_key(min_credits=100):
27
+ global CURRENT_SERPER_INDEX
28
+
29
+ for idx, key in enumerate(SERPER_API_KEYS):
30
+ if not key:
31
+ continue
32
+ credits = get_serper_credits(key)
33
+ if credits >= min_credits:
34
+ CURRENT_SERPER_INDEX = idx
35
+ return key, credits, CURRENT_SERPER_INDEX
36
+
37
+ raise Exception(
38
+ "❌ SERPER credits exhausted on all APIs. Please contact the developer. "
39
+ "❌ يجب التواصل مع المطور لإضافة API Keys جديدة لSERPER"
40
+ )
routers/content_generation_route.py CHANGED
@@ -15,6 +15,8 @@ from core import generate_course_content, convert_dict_to_html
15
  from modules import (
16
  llm,
17
  inputs,
 
 
18
  )
19
  from tools import scrape_course
20
  from schemas import (
@@ -45,21 +47,37 @@ def run_keywords(outlines: CourseContent):
45
 
46
 
47
  @router.post("/scrape")
48
- def scrape_endpoint(course:CourseWithQueries):
49
  try:
50
- # نحول Pydantic model لـ dict بنفس الشكل اللي الفانكشن مستنياها
 
 
 
 
 
51
  course_dict = course.dict()
52
  summary, raw = scrape_course(course_dict)
 
 
 
 
53
  return {
54
  "message": "Queries Searched and Scraped Well 🚀",
 
 
 
 
 
 
55
  "summary": summary,
56
  "raw": raw,
57
  }
58
 
59
- return {"message": "Queries Searched and Scraped Well 🚀", "summary": summary, "raw": raw}
60
  except Exception as e:
61
- # في البروداكشن خليه logging و متطلعش الرسالة الخام للعميل
62
- raise HTTPException(status_code=500, detail=str(e))
 
 
63
 
64
 
65
  @router.post("/content_generation")
 
15
  from modules import (
16
  llm,
17
  inputs,
18
+ get_valid_serper_key,
19
+ get_serper_credits,
20
  )
21
  from tools import scrape_course
22
  from schemas import (
 
47
 
48
 
49
  @router.post("/scrape")
50
+ def scrape_endpoint(course: CourseWithQueries):
51
  try:
52
+ # 1️⃣ اختار API Key صالح
53
+ api_key, credits_before, CURRENT_SERPER_INDEX = get_valid_serper_key(
54
+ min_credits=100
55
+ )
56
+
57
+ # 2️⃣ شغّل السكربنج
58
  course_dict = course.dict()
59
  summary, raw = scrape_course(course_dict)
60
+
61
+ # 3️⃣ احسب الكريديتس بعد التشغيل
62
+ credits_after = get_serper_credits(api_key)
63
+
64
  return {
65
  "message": "Queries Searched and Scraped Well 🚀",
66
+ "serper": {
67
+ "api_index": CURRENT_SERPER_INDEX,
68
+ "credits_before": credits_before,
69
+ "credits_after": credits_after,
70
+ "credits_used": credits_before - credits_after,
71
+ },
72
  "summary": summary,
73
  "raw": raw,
74
  }
75
 
 
76
  except Exception as e:
77
+ return {
78
+ "error": True,
79
+ "message": str(e),
80
+ }
81
 
82
 
83
  @router.post("/content_generation")
tools/searching_scraping_queries.py CHANGED
@@ -8,11 +8,11 @@ import requests
8
  from bs4 import BeautifulSoup
9
  from PyPDF2 import PdfReader
10
  from concurrent.futures import ThreadPoolExecutor
11
-
12
  # -------- CONFIG ----------
13
  import os
14
 
15
- SERPER_API_KEY = os.getenv("SERPER_API_KEY", "")
16
  BAD_KEYWORDS = [
17
  "facebook",
18
  "youtube",
@@ -88,19 +88,21 @@ def scrape_page(url, retries=RETRIES, limit=SCRAPE_CHAR_LIMIT):
88
  return ""
89
 
90
 
91
- def search_serper(query, max_results=SERP_MAX_RESULTS):
92
- if not SERPER_API_KEY:
93
- # في البروداكشن خليه raise Exception أحسن
94
  return []
95
 
96
  try:
97
  conn = http.client.HTTPSConnection("google.serper.dev")
98
  payload = json.dumps({"q": query, "page": 1})
99
- headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
 
 
 
100
  conn.request("POST", "/search", payload, headers)
101
  res = conn.getresponse()
102
- data = res.read()
103
- response_json = json.loads(data.decode("utf-8"))
104
  except:
105
  return []
106
 
@@ -157,7 +159,7 @@ def safe_append(summary_result, raw_result, final_output_summary, final_output_r
157
  )
158
 
159
 
160
- def scrape_course(course: dict):
161
  """
162
  course هنا هو نفس الستركشر اللي كان جاي من JSON file
163
  ويرجع dicts: (final_output_summary, final_output_raw)
@@ -217,7 +219,7 @@ def scrape_course(course: dict):
217
  print(
218
  f"\n🔍 [SEARCH] U{unit_idx} T{topic_idx} S{sub_idx} — Query: {query_text}"
219
  )
220
- results = search_serper(query_text, max_results=SERP_MAX_RESULTS)
221
 
222
  for r in results:
223
  with lock:
 
8
  from bs4 import BeautifulSoup
9
  from PyPDF2 import PdfReader
10
  from concurrent.futures import ThreadPoolExecutor
11
+ from modules import SERPER_API_KEYS
12
  # -------- CONFIG ----------
13
  import os
14
 
15
+ # SERPER_API_KEY = os.getenv("SERPER_API_KEY", "")
16
  BAD_KEYWORDS = [
17
  "facebook",
18
  "youtube",
 
88
  return ""
89
 
90
 
91
+ def search_serper(query, CURRENT_SERPER_INDEX, max_results=SERP_MAX_RESULTS):
92
+ api_key = SERPER_API_KEYS[CURRENT_SERPER_INDEX]
93
+ if not api_key:
94
  return []
95
 
96
  try:
97
  conn = http.client.HTTPSConnection("google.serper.dev")
98
  payload = json.dumps({"q": query, "page": 1})
99
+ headers = {
100
+ "X-API-KEY": api_key,
101
+ "Content-Type": "application/json",
102
+ }
103
  conn.request("POST", "/search", payload, headers)
104
  res = conn.getresponse()
105
+ response_json = json.loads(res.read().decode("utf-8"))
 
106
  except:
107
  return []
108
 
 
159
  )
160
 
161
 
162
+ def scrape_course(course: dict,CURRENT_SERPER_INDEX):
163
  """
164
  course هنا هو نفس الستركشر اللي كان جاي من JSON file
165
  ويرجع dicts: (final_output_summary, final_output_raw)
 
219
  print(
220
  f"\n🔍 [SEARCH] U{unit_idx} T{topic_idx} S{sub_idx} — Query: {query_text}"
221
  )
222
+ results = search_serper(query_text, CURRENT_SERPER_INDEX, max_results=SERP_MAX_RESULTS)
223
 
224
  for r in results:
225
  with lock: