Add logging to app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,8 @@
|
|
| 1 |
|
|
|
|
| 2 |
import time
|
|
|
|
|
|
|
| 3 |
from selenium import webdriver
|
| 4 |
from selenium.webdriver.common.by import By
|
| 5 |
from selenium.webdriver.common.by import By
|
|
@@ -14,53 +17,70 @@ import gradio as gr
|
|
| 14 |
# from selenium.webdriver.firefox.service import Service
|
| 15 |
# from selenium.webdriver.firefox.options import Options
|
| 16 |
# from selenium.webdriver.common.by import By
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
# Configure environment for matplotlib
|
| 21 |
os.environ['MPLCONFIGDIR'] = tempfile.mkdtemp()
|
|
|
|
| 22 |
|
| 23 |
def setup_selenium():
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
def form_input_text(word):
|
| 63 |
# word = 'go'
|
|
|
|
| 64 |
return f"use the word '{word}' based on the following sentences types, give output in a table format, no quotes around sentences, no repeating the same sentence: Simple Sentence, Compound Sentence"
|
| 65 |
|
| 66 |
# Complex Sentence, Compound-Complex Sentence, Declarative Sentence, Interrogative Sentence, Imperative Sentence, Exclamatory Sentence, Optative Sentence, Parallel Sentences, Balanced Sentences, Loose Sentence, Periodic Sentence, Elliptical Sentence, Inverted Sentence, Chiasmus, Anaphora, Minor Sentence, Run-on Sentence, Sentence Fragments, Conditional Sentence, Cleft Sentence, Existential Sentence, Tautology, Paradoxical Sentence, Formal Sentence, Colloquial Sentence, Poetic Sentence, Humorous or Ironic Sentence, Everyday Sentence, Technical Sentence, Standard Negation (Using 'not' or contractions), Negative Words (Indefinite pronouns/adverbs), Double Negation (Non-standard or emphatic), Affixal Negation (Prefixes like un-, in-, dis-), Implied Negation (No direct negative word), Negative Questions, Negative Imperatives, Negative interrogative, Active Sentence, Full Passive Sentence, Agentless Passive Sentence, Get-Passive (Informal), Passive with Modal Verbs, Impersonal Passive, Interrogative Passive, Negative Passive, Combined Cases: Negative + Passive, Gerundive Sentences, Indicative (States facts)"
|
|
@@ -69,12 +89,17 @@ def fetch_sentences(input_text):
|
|
| 69 |
driver = None
|
| 70 |
generated_sentences = []
|
| 71 |
try:
|
|
|
|
|
|
|
|
|
|
| 72 |
# Set up the WebDriver (e.g., Chrome)
|
| 73 |
# driver = webdriver.Chrome(options=options) # Ensure ChromeDriver is in PATH
|
| 74 |
# driver = webdriver.Firefox(options=options)
|
| 75 |
|
| 76 |
driver = setup_selenium()
|
| 77 |
driver.implicitly_wait(5)
|
|
|
|
|
|
|
| 78 |
# Open a website
|
| 79 |
driver.get("https://copilot.microsoft.com/chat")
|
| 80 |
print(f"...after: driver.get('https://copilot.microsoft.com/chat')")
|
|
|
|
| 1 |
|
| 2 |
+
import os
|
| 3 |
import time
|
| 4 |
+
import tempfile
|
| 5 |
+
import logging
|
| 6 |
from selenium import webdriver
|
| 7 |
from selenium.webdriver.common.by import By
|
| 8 |
from selenium.webdriver.common.by import By
|
|
|
|
| 17 |
# from selenium.webdriver.firefox.service import Service
|
| 18 |
# from selenium.webdriver.firefox.options import Options
|
| 19 |
# from selenium.webdriver.common.by import By
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# Configure logging
|
| 23 |
+
logging.basicConfig(
|
| 24 |
+
level=logging.INFO,
|
| 25 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
| 26 |
+
handlers=[
|
| 27 |
+
logging.FileHandler('/tmp/webscraper.log'),
|
| 28 |
+
logging.StreamHandler()
|
| 29 |
+
]
|
| 30 |
+
)
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
|
| 33 |
# Configure environment for matplotlib
|
| 34 |
os.environ['MPLCONFIGDIR'] = tempfile.mkdtemp()
|
| 35 |
+
logger.info(f"Matplotlib config directory set to: {os.environ['MPLCONFIGDIR']}")
|
| 36 |
|
| 37 |
def setup_selenium():
|
| 38 |
+
try:
|
| 39 |
+
logger.info("Initializing Selenium Firefox driver...")
|
| 40 |
+
options = Options()
|
| 41 |
+
options.headless = True
|
| 42 |
+
options.binary_location = '/usr/bin/firefox-esr'
|
| 43 |
+
options.add_argument("--headless")
|
| 44 |
+
options.add_argument("--disable-gpu")
|
| 45 |
+
options.add_argument("--no-sandbox")
|
| 46 |
+
options.add_argument("--disable-dev-shm-usage")
|
| 47 |
+
|
| 48 |
+
options.add_argument("--headless") # Run in headless mode (optional)
|
| 49 |
+
options.add_argument("--start-maximized") # Start the browser maximized
|
| 50 |
+
options.add_argument("--disable-blink-features=AutomationControlled") # Disable automation flag
|
| 51 |
+
options.add_argument("--disable-notifications") # Disable notifications
|
| 52 |
+
options.set_preference("dom.webnotifications.enabled", False) # Disable web notifications
|
| 53 |
+
options.set_preference("general.useragent.override", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36") # Mimic a real browser's user agent
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# service = Service(
|
| 57 |
+
# executable_path=os.path.join(os.getcwd(), 'geckodriver'),
|
| 58 |
+
# log_path=os.path.join(tempfile.gettempdir(), 'geckodriver.log')
|
| 59 |
+
# )
|
| 60 |
+
|
| 61 |
+
# Ensure geckodriver is executable
|
| 62 |
+
geckodriver_path = os.path.join(os.getcwd(), 'geckodriver')
|
| 63 |
+
os.chmod(geckodriver_path, 0o755) # Make sure it's executable
|
| 64 |
+
|
| 65 |
+
# Configure service with log_path
|
| 66 |
+
service = Service(
|
| 67 |
+
executable_path=geckodriver_path,
|
| 68 |
+
log_path=os.path.join(tempfile.gettempdir(), 'geckodriver.log')
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
driver = webdriver.Firefox(
|
| 72 |
+
options=options,
|
| 73 |
+
service=service
|
| 74 |
+
# executable_path=os.path.join(os.getcwd(), 'geckodriver')
|
| 75 |
+
)
|
| 76 |
+
return driver
|
| 77 |
+
except Exception as e:
|
| 78 |
+
logger.error(f"Failed to initialize Selenium: {str(e)}")
|
| 79 |
+
raise
|
| 80 |
|
| 81 |
def form_input_text(word):
|
| 82 |
# word = 'go'
|
| 83 |
+
logger.info(f"Generating input text for word: {word}")
|
| 84 |
return f"use the word '{word}' based on the following sentences types, give output in a table format, no quotes around sentences, no repeating the same sentence: Simple Sentence, Compound Sentence"
|
| 85 |
|
| 86 |
# Complex Sentence, Compound-Complex Sentence, Declarative Sentence, Interrogative Sentence, Imperative Sentence, Exclamatory Sentence, Optative Sentence, Parallel Sentences, Balanced Sentences, Loose Sentence, Periodic Sentence, Elliptical Sentence, Inverted Sentence, Chiasmus, Anaphora, Minor Sentence, Run-on Sentence, Sentence Fragments, Conditional Sentence, Cleft Sentence, Existential Sentence, Tautology, Paradoxical Sentence, Formal Sentence, Colloquial Sentence, Poetic Sentence, Humorous or Ironic Sentence, Everyday Sentence, Technical Sentence, Standard Negation (Using 'not' or contractions), Negative Words (Indefinite pronouns/adverbs), Double Negation (Non-standard or emphatic), Affixal Negation (Prefixes like un-, in-, dis-), Implied Negation (No direct negative word), Negative Questions, Negative Imperatives, Negative interrogative, Active Sentence, Full Passive Sentence, Agentless Passive Sentence, Get-Passive (Informal), Passive with Modal Verbs, Impersonal Passive, Interrogative Passive, Negative Passive, Combined Cases: Negative + Passive, Gerundive Sentences, Indicative (States facts)"
|
|
|
|
| 89 |
driver = None
|
| 90 |
generated_sentences = []
|
| 91 |
try:
|
| 92 |
+
logger.info("Starting sentence fetching process...")
|
| 93 |
+
logger.debug(f"Input text: {input_text[:100]}...") # Log first 100 chars
|
| 94 |
+
|
| 95 |
# Set up the WebDriver (e.g., Chrome)
|
| 96 |
# driver = webdriver.Chrome(options=options) # Ensure ChromeDriver is in PATH
|
| 97 |
# driver = webdriver.Firefox(options=options)
|
| 98 |
|
| 99 |
driver = setup_selenium()
|
| 100 |
driver.implicitly_wait(5)
|
| 101 |
+
|
| 102 |
+
logger.info("Navigating to target URL...")
|
| 103 |
# Open a website
|
| 104 |
driver.get("https://copilot.microsoft.com/chat")
|
| 105 |
print(f"...after: driver.get('https://copilot.microsoft.com/chat')")
|