Spaces:
Running
Running
Commit ·
c43db9b
1
Parent(s): d1ec696
Fixed WebScraper and standardized URLs
Browse files- app/core/settings.py +1 -1
- app/memory/cache.py +2 -1
- app/memory/database.py +6 -4
- app/worker/celery_app.py +25 -0
- app/worker/tasks.py +22 -0
- check_redis.py +18 -0
- debug_celery_worker.py +49 -0
- frontend/Dockerfile +33 -0
- frontend/app.py +9 -9
- frontend/firebase_utils.py +60 -0
- test_scraper_local.py +33 -0
app/core/settings.py
CHANGED
|
@@ -24,7 +24,7 @@ class Settings(BaseSettings):
|
|
| 24 |
|
| 25 |
# API Config
|
| 26 |
API_HOST: str = "0.0.0.0"
|
| 27 |
-
|
| 28 |
LOG_LEVEL: str = "INFO"
|
| 29 |
TIMEOUT_SECONDS: int = 120
|
| 30 |
|
|
|
|
| 24 |
|
| 25 |
# API Config
|
| 26 |
API_HOST: str = "0.0.0.0"
|
| 27 |
+
PORT: int = 8000 # Standard Render/Cloud Run env var
|
| 28 |
LOG_LEVEL: str = "INFO"
|
| 29 |
TIMEOUT_SECONDS: int = 120
|
| 30 |
|
app/memory/cache.py
CHANGED
|
@@ -2,6 +2,7 @@ import json
|
|
| 2 |
import logging
|
| 3 |
import os
|
| 4 |
from typing import Any, Dict, Optional
|
|
|
|
| 5 |
|
| 6 |
import redis
|
| 7 |
from redis.exceptions import RedisError
|
|
@@ -23,7 +24,7 @@ class CacheManager:
|
|
| 23 |
redis_url: Redis connection string (used if pool not provided).
|
| 24 |
connection_pool: Existing Redis connection pool.
|
| 25 |
"""
|
| 26 |
-
self.redis_url = redis_url or
|
| 27 |
self.redis_client = None
|
| 28 |
|
| 29 |
try:
|
|
|
|
| 2 |
import logging
|
| 3 |
import os
|
| 4 |
from typing import Any, Dict, Optional
|
| 5 |
+
from app.core.settings import settings
|
| 6 |
|
| 7 |
import redis
|
| 8 |
from redis.exceptions import RedisError
|
|
|
|
| 24 |
redis_url: Redis connection string (used if pool not provided).
|
| 25 |
connection_pool: Existing Redis connection pool.
|
| 26 |
"""
|
| 27 |
+
self.redis_url = redis_url or settings.REDIS_URL
|
| 28 |
self.redis_client = None
|
| 29 |
|
| 30 |
try:
|
app/memory/database.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import logging
|
| 2 |
import os
|
| 3 |
-
from datetime import datetime, timezone
|
| 4 |
from typing import Any, Dict, Optional, List
|
|
|
|
| 5 |
|
| 6 |
import pymongo
|
| 7 |
from pymongo import IndexModel, ASCENDING
|
|
@@ -24,7 +24,7 @@ class DatabaseManager:
|
|
| 24 |
mongo_uri: MongoDB connection string.
|
| 25 |
client: Existing PyMongo client (shared pool).
|
| 26 |
"""
|
| 27 |
-
self.mongo_uri = mongo_uri or
|
| 28 |
self.client = None
|
| 29 |
self.db = None
|
| 30 |
self.collection = None
|
|
@@ -45,9 +45,11 @@ class DatabaseManager:
|
|
| 45 |
self.client.server_info()
|
| 46 |
|
| 47 |
# Setup DB and collection
|
| 48 |
-
db_name =
|
| 49 |
try:
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
if uri_db:
|
| 52 |
db_name = uri_db
|
| 53 |
except Exception:
|
|
|
|
| 1 |
import logging
|
| 2 |
import os
|
|
|
|
| 3 |
from typing import Any, Dict, Optional, List
|
| 4 |
+
from app.core.settings import settings
|
| 5 |
|
| 6 |
import pymongo
|
| 7 |
from pymongo import IndexModel, ASCENDING
|
|
|
|
| 24 |
mongo_uri: MongoDB connection string.
|
| 25 |
client: Existing PyMongo client (shared pool).
|
| 26 |
"""
|
| 27 |
+
self.mongo_uri = mongo_uri or settings.MONGO_URI
|
| 28 |
self.client = None
|
| 29 |
self.db = None
|
| 30 |
self.collection = None
|
|
|
|
| 45 |
self.client.server_info()
|
| 46 |
|
| 47 |
# Setup DB and collection
|
| 48 |
+
db_name = settings.MONGO_DB_NAME
|
| 49 |
try:
|
| 50 |
+
# If URI contains a DB name, it will override the setting default
|
| 51 |
+
parsed_uri = pymongo.uri_parser.parse_uri(self.mongo_uri)
|
| 52 |
+
uri_db = parsed_uri.get('database')
|
| 53 |
if uri_db:
|
| 54 |
db_name = uri_db
|
| 55 |
except Exception:
|
app/worker/celery_app.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from celery import Celery
|
| 2 |
+
import os
|
| 3 |
+
from app.core.settings import settings
|
| 4 |
+
|
| 5 |
+
# Initialize Celery
|
| 6 |
+
celery_app = Celery(
|
| 7 |
+
"mathminds",
|
| 8 |
+
broker=settings.REDIS_URL,
|
| 9 |
+
backend=settings.REDIS_URL,
|
| 10 |
+
include=["app.worker.tasks"]
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
# Optional configuration
|
| 14 |
+
celery_app.conf.update(
|
| 15 |
+
task_serializer="json",
|
| 16 |
+
accept_content=["json"],
|
| 17 |
+
result_serializer="json",
|
| 18 |
+
timezone="UTC",
|
| 19 |
+
enable_utc=True,
|
| 20 |
+
task_track_started=True,
|
| 21 |
+
task_time_limit=300, # 5 minutes max
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
if __name__ == "__main__":
|
| 25 |
+
celery_app.start()
|
app/worker/tasks.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from app.worker.celery_app import celery_app
|
| 3 |
+
from app.tools.web_scraper import run_playwright_sync
|
| 4 |
+
|
| 5 |
+
logger = logging.getLogger(__name__)
|
| 6 |
+
|
| 7 |
+
@celery_app.task(name="app.worker.tasks.scrape_task", bind=True)
|
| 8 |
+
def scrape_task(self, query: str, headless: bool = True, extraction_focus: str = None):
|
| 9 |
+
"""
|
| 10 |
+
Celery task for web scraping.
|
| 11 |
+
"""
|
| 12 |
+
logger.info(f"Task {self.request.id} started for query: {query}")
|
| 13 |
+
try:
|
| 14 |
+
result = run_playwright_sync(query, headless, extraction_focus)
|
| 15 |
+
return result
|
| 16 |
+
except Exception as e:
|
| 17 |
+
logger.error(f"Task failed: {e}")
|
| 18 |
+
return {
|
| 19 |
+
"source": "web_scraper",
|
| 20 |
+
"error": str(e),
|
| 21 |
+
"status": "error"
|
| 22 |
+
}
|
check_redis.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import redis
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
|
| 7 |
+
|
| 8 |
+
def check_redis():
|
| 9 |
+
print(f"Checking Redis at: {redis_url}")
|
| 10 |
+
try:
|
| 11 |
+
r = redis.from_url(redis_url)
|
| 12 |
+
r.ping()
|
| 13 |
+
print("✅ Redis is UP!")
|
| 14 |
+
except Exception as e:
|
| 15 |
+
print(f"❌ Redis is DOWN or unreachable: {e}")
|
| 16 |
+
|
| 17 |
+
if __name__ == "__main__":
|
| 18 |
+
check_redis()
|
debug_celery_worker.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import logging
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
# Add the current directory to sys.path so we can import 'app'
|
| 7 |
+
sys.path.append(os.getcwd())
|
| 8 |
+
|
| 9 |
+
from app.worker.tasks import scrape_task
|
| 10 |
+
import time
|
| 11 |
+
|
| 12 |
+
logging.basicConfig(level=logging.INFO)
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
async def debug_scrape():
|
| 16 |
+
print("Triggering Celery Scrape Task...")
|
| 17 |
+
query = "gold rate in india today"
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
# Dispatch task
|
| 21 |
+
result = scrape_task.delay(query)
|
| 22 |
+
print(f"Task ID: {result.id}")
|
| 23 |
+
|
| 24 |
+
# Wait for result
|
| 25 |
+
start_time = time.time()
|
| 26 |
+
max_wait = 60 # seconds
|
| 27 |
+
|
| 28 |
+
while time.time() - start_time < max_wait:
|
| 29 |
+
if result.ready():
|
| 30 |
+
print("Task Ready!")
|
| 31 |
+
print("Result Status:", result.status)
|
| 32 |
+
# Safely handle potential encoding issues when printing to console
|
| 33 |
+
try:
|
| 34 |
+
res_content = str(result.result)
|
| 35 |
+
print("Result Content (partial):", res_content[:200].encode('ascii', 'ignore').decode('ascii'))
|
| 36 |
+
except Exception as e:
|
| 37 |
+
print(f"Result received, but print failed: {e}")
|
| 38 |
+
return
|
| 39 |
+
|
| 40 |
+
print(f"Waiting... (status: {result.status})")
|
| 41 |
+
await asyncio.sleep(2)
|
| 42 |
+
|
| 43 |
+
print("Task timed out. Is the worker running?")
|
| 44 |
+
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Dispatch failed: {e}")
|
| 47 |
+
|
| 48 |
+
if __name__ == "__main__":
|
| 49 |
+
asyncio.run(debug_scrape())
|
frontend/Dockerfile
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use official Python runtime as a parent image
|
| 2 |
+
FROM python:3.12-slim
|
| 3 |
+
|
| 4 |
+
# Set environment variables
|
| 5 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 6 |
+
ENV PYTHONUNBUFFERED=1
|
| 7 |
+
ENV PORT=8501
|
| 8 |
+
|
| 9 |
+
# Set working directory
|
| 10 |
+
WORKDIR /app
|
| 11 |
+
|
| 12 |
+
# Install system dependencies (curl for health checks)
|
| 13 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 14 |
+
curl \
|
| 15 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 16 |
+
|
| 17 |
+
# Install Python dependencies
|
| 18 |
+
# We reuse the root requirements.txt for simplicity, or we could have a specific one
|
| 19 |
+
COPY requirements.txt .
|
| 20 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 21 |
+
|
| 22 |
+
# Copy the rest of the application
|
| 23 |
+
COPY . .
|
| 24 |
+
|
| 25 |
+
# Create a non-root user and switch to it for security
|
| 26 |
+
RUN useradd -m appuser && chown -R appuser /app
|
| 27 |
+
USER appuser
|
| 28 |
+
|
| 29 |
+
# Expose Streamlit's default port
|
| 30 |
+
EXPOSE 8501
|
| 31 |
+
|
| 32 |
+
# Command to run the Streamlit app
|
| 33 |
+
CMD streamlit run frontend/app.py --server.port=$PORT --server.address=0.0.0.0
|
frontend/app.py
CHANGED
|
@@ -107,8 +107,8 @@ st.markdown("""
|
|
| 107 |
# ====================================================
|
| 108 |
# Config
|
| 109 |
# ====================================================
|
| 110 |
-
|
| 111 |
-
API_URL = f"{
|
| 112 |
|
| 113 |
|
| 114 |
# ====================================================
|
|
@@ -154,7 +154,7 @@ def load_sessions():
|
|
| 154 |
"""Fetch THIS user's chat sessions from the backend and populate state."""
|
| 155 |
try:
|
| 156 |
headers = get_auth_headers()
|
| 157 |
-
response = requests.get(f"{
|
| 158 |
if response.status_code == 200:
|
| 159 |
st.session_state.chat_sessions = response.json()
|
| 160 |
# Mark that we've successfully loaded data for this specific user
|
|
@@ -197,7 +197,7 @@ def load_messages(session_id):
|
|
| 197 |
try:
|
| 198 |
headers = get_auth_headers()
|
| 199 |
response = requests.get(
|
| 200 |
-
f"{
|
| 201 |
headers=headers, timeout=30
|
| 202 |
)
|
| 203 |
if response.status_code == 200:
|
|
@@ -232,7 +232,7 @@ def add_message(role, content, sent_to_api=False, **kwargs):
|
|
| 232 |
def new_chat():
|
| 233 |
try:
|
| 234 |
headers = get_auth_headers()
|
| 235 |
-
response = requests.post(f"{
|
| 236 |
if response.status_code == 200:
|
| 237 |
new_s = response.json()
|
| 238 |
st.session_state.active_session_id = new_s["session_id"]
|
|
@@ -248,7 +248,7 @@ def new_chat():
|
|
| 248 |
def delete_chat(sid):
|
| 249 |
try:
|
| 250 |
headers = get_auth_headers()
|
| 251 |
-
response = requests.delete(f"{
|
| 252 |
if response.status_code == 200:
|
| 253 |
if st.session_state.active_session_id == sid:
|
| 254 |
st.session_state.active_session_id = None
|
|
@@ -265,7 +265,7 @@ def rename_chat(sid, new_title):
|
|
| 265 |
try:
|
| 266 |
headers = get_auth_headers()
|
| 267 |
response = requests.patch(
|
| 268 |
-
f"{
|
| 269 |
headers=headers, json={"title": new_title}, timeout=30
|
| 270 |
)
|
| 271 |
if response.status_code == 200:
|
|
@@ -387,7 +387,7 @@ def profile_interface():
|
|
| 387 |
|
| 388 |
if "profile_data" not in st.session_state:
|
| 389 |
try:
|
| 390 |
-
r = requests.get(f"{
|
| 391 |
st.session_state.profile_data = r.json() if r.status_code == 200 else {}
|
| 392 |
except Exception:
|
| 393 |
st.session_state.profile_data = {}
|
|
@@ -410,7 +410,7 @@ def profile_interface():
|
|
| 410 |
if st.form_submit_button("Save Profile", use_container_width=True, type="primary"):
|
| 411 |
payload = {"display_name": display_name, "math_level": math_level, "interests": interests}
|
| 412 |
try:
|
| 413 |
-
r = requests.post(f"{
|
| 414 |
if r.status_code == 200:
|
| 415 |
st.success("Profile updated!")
|
| 416 |
st.session_state.profile_data = payload
|
|
|
|
| 107 |
# ====================================================
|
| 108 |
# Config
|
| 109 |
# ====================================================
|
| 110 |
+
BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:8000")
|
| 111 |
+
API_URL = f"{BACKEND_URL}/solve"
|
| 112 |
|
| 113 |
|
| 114 |
# ====================================================
|
|
|
|
| 154 |
"""Fetch THIS user's chat sessions from the backend and populate state."""
|
| 155 |
try:
|
| 156 |
headers = get_auth_headers()
|
| 157 |
+
response = requests.get(f"{BACKEND_URL}/chat/sessions", headers=headers, timeout=30)
|
| 158 |
if response.status_code == 200:
|
| 159 |
st.session_state.chat_sessions = response.json()
|
| 160 |
# Mark that we've successfully loaded data for this specific user
|
|
|
|
| 197 |
try:
|
| 198 |
headers = get_auth_headers()
|
| 199 |
response = requests.get(
|
| 200 |
+
f"{BACKEND_URL}/chat/sessions/{session_id}/messages",
|
| 201 |
headers=headers, timeout=30
|
| 202 |
)
|
| 203 |
if response.status_code == 200:
|
|
|
|
| 232 |
def new_chat():
|
| 233 |
try:
|
| 234 |
headers = get_auth_headers()
|
| 235 |
+
response = requests.post(f"{BACKEND_URL}/chat/sessions", headers=headers, timeout=30)
|
| 236 |
if response.status_code == 200:
|
| 237 |
new_s = response.json()
|
| 238 |
st.session_state.active_session_id = new_s["session_id"]
|
|
|
|
| 248 |
def delete_chat(sid):
|
| 249 |
try:
|
| 250 |
headers = get_auth_headers()
|
| 251 |
+
response = requests.delete(f"{BACKEND_URL}/chat/sessions/{sid}", headers=headers, timeout=30)
|
| 252 |
if response.status_code == 200:
|
| 253 |
if st.session_state.active_session_id == sid:
|
| 254 |
st.session_state.active_session_id = None
|
|
|
|
| 265 |
try:
|
| 266 |
headers = get_auth_headers()
|
| 267 |
response = requests.patch(
|
| 268 |
+
f"{BACKEND_URL}/chat/sessions/{sid}",
|
| 269 |
headers=headers, json={"title": new_title}, timeout=30
|
| 270 |
)
|
| 271 |
if response.status_code == 200:
|
|
|
|
| 387 |
|
| 388 |
if "profile_data" not in st.session_state:
|
| 389 |
try:
|
| 390 |
+
r = requests.get(f"{BACKEND_URL}/users/profile", headers=headers, timeout=30)
|
| 391 |
st.session_state.profile_data = r.json() if r.status_code == 200 else {}
|
| 392 |
except Exception:
|
| 393 |
st.session_state.profile_data = {}
|
|
|
|
| 410 |
if st.form_submit_button("Save Profile", use_container_width=True, type="primary"):
|
| 411 |
payload = {"display_name": display_name, "math_level": math_level, "interests": interests}
|
| 412 |
try:
|
| 413 |
+
r = requests.post(f"{BACKEND_URL}/users/profile", json=payload, headers=headers)
|
| 414 |
if r.status_code == 200:
|
| 415 |
st.success("Profile updated!")
|
| 416 |
st.session_state.profile_data = payload
|
frontend/firebase_utils.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
FIREBASE_WEB_API_KEY = os.getenv("FIREBASE_WEB_API_KEY")
|
| 8 |
+
|
| 9 |
+
def sign_in_with_email(email, password):
|
| 10 |
+
"""
|
| 11 |
+
Signs in a user using Firebase Auth REST API.
|
| 12 |
+
Returns (id_token, local_id, email, error_message)
|
| 13 |
+
"""
|
| 14 |
+
if not FIREBASE_WEB_API_KEY:
|
| 15 |
+
return None, None, None, "FIREBASE_WEB_API_KEY is not set in .env"
|
| 16 |
+
|
| 17 |
+
url = f"https://identitytoolkit.googleapis.com/v1/accounts:signInWithPassword?key={FIREBASE_WEB_API_KEY}"
|
| 18 |
+
payload = {
|
| 19 |
+
"email": email,
|
| 20 |
+
"password": password,
|
| 21 |
+
"returnSecureToken": True
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
response = requests.post(url, json=payload)
|
| 26 |
+
data = response.json()
|
| 27 |
+
|
| 28 |
+
if response.status_code == 200:
|
| 29 |
+
return data["idToken"], data["localId"], data["email"], None
|
| 30 |
+
else:
|
| 31 |
+
error_msg = data.get("error", {}).get("message", "Unknown error")
|
| 32 |
+
return None, None, None, error_msg
|
| 33 |
+
except Exception as e:
|
| 34 |
+
return None, None, None, str(e)
|
| 35 |
+
|
| 36 |
+
def sign_up_with_email(email, password):
|
| 37 |
+
"""
|
| 38 |
+
Registers a new user using Firebase Auth REST API.
|
| 39 |
+
"""
|
| 40 |
+
if not FIREBASE_WEB_API_KEY:
|
| 41 |
+
return None, None, None, "FIREBASE_WEB_API_KEY is not set in .env"
|
| 42 |
+
|
| 43 |
+
url = f"https://identitytoolkit.googleapis.com/v1/accounts:signUp?key={FIREBASE_WEB_API_KEY}"
|
| 44 |
+
payload = {
|
| 45 |
+
"email": email,
|
| 46 |
+
"password": password,
|
| 47 |
+
"returnSecureToken": True
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
response = requests.post(url, json=payload)
|
| 52 |
+
data = response.json()
|
| 53 |
+
|
| 54 |
+
if response.status_code == 200:
|
| 55 |
+
return data["idToken"], data["localId"], data["email"], None
|
| 56 |
+
else:
|
| 57 |
+
error_msg = data.get("error", {}).get("message", "Unknown error")
|
| 58 |
+
return None, None, None, error_msg
|
| 59 |
+
except Exception as e:
|
| 60 |
+
return None, None, None, str(e)
|
test_scraper_local.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import sys
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# Add the current directory to sys.path so we can import 'app'
|
| 6 |
+
sys.path.append(os.getcwd())
|
| 7 |
+
|
| 8 |
+
from app.tools.web_scraper import run_playwright_sync
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
def test_direct_scrape():
|
| 14 |
+
print("--- Testing run_playwright_sync DIRECTLY (Subprocess-safe) ---")
|
| 15 |
+
query = "current gold rate in mumbai"
|
| 16 |
+
|
| 17 |
+
try:
|
| 18 |
+
# We run it synchronously as it's designed
|
| 19 |
+
result = run_playwright_sync(query, headless=True)
|
| 20 |
+
|
| 21 |
+
print("\n[RESULT]")
|
| 22 |
+
if result.get("status") == "success":
|
| 23 |
+
print(f"URL: {result.get('url')}")
|
| 24 |
+
print(f"Content Length: {len(result.get('content', ''))}")
|
| 25 |
+
print(f"Sample: {result.get('content')[:500]}...")
|
| 26 |
+
else:
|
| 27 |
+
print(f"Error: {result.get('error')}")
|
| 28 |
+
|
| 29 |
+
except Exception as e:
|
| 30 |
+
print(f"Crashed: {e}")
|
| 31 |
+
|
| 32 |
+
if __name__ == "__main__":
|
| 33 |
+
test_direct_scrape()
|