Spaces:

Rahul-Samedavar
/

hackrx

Build error

App Files Files Community

Rahul-Samedavar commited on Aug 22, 2025

Commit

68e7a9e

1 Parent(s): 74c7e34

fixed selenium

Browse files

Files changed (5) hide show

Dockerfile +34 -11
app.py +104 -131
main.py +247 -0
packages.txt +2 -0
requirements.txt +7 -6

Dockerfile CHANGED Viewed

@@ -1,16 +1,39 @@
-# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
-# you will also find guides on how best to write your Dockerfile
-FROM python:3.9
-RUN useradd -m -u 1000 user
-USER user
-ENV PATH="/home/user/.local/bin:$PATH"
-WORKDIR /app
-COPY --chown=user ./requirements.txt requirements.txt
-RUN pip install --no-cache-dir --upgrade -r requirements.txt
-COPY --chown=user . /app
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+FROM python:3.11
+# Install system dependencies for Chrome
+RUN apt-get update && apt-get install -y \
+    wget \
+    gnupg \
+    unzip \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Install Chrome
+RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
+    && echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list \
+    && apt-get update \
+    && apt-get install -y google-chrome-stable \
+    && rm -rf /var/lib/apt/lists/*
+# Install ChromeDriver
+RUN CHROME_DRIVER_VERSION=`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE` \
+    && wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/$CHROME_DRIVER_VERSION/chromedriver_linux64.zip \
+    && unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/ \
+    && rm /tmp/chromedriver.zip \
+    && chmod +x /usr/local/bin/chromedriver
+# Set up the working directory
+WORKDIR /code
+# Copy requirements and install Python dependencies
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Copy the application
+COPY . /code/
+# Expose port
+EXPOSE 7860
+# Command to run the application
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -8,12 +8,6 @@ import os
 from bs4 import BeautifulSoup
 import logging
 import re
-from selenium import webdriver
-from selenium.webdriver.common.by import By
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.chrome.options import Options
-import time
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -44,7 +38,7 @@ def call_llm(messages: List[dict], max_tokens: int = 150) -> str:
             "messages": messages,
             "model": "gpt-5-nano",
             "max_tokens": max_tokens,
-            "temperature": 0.1  # Low temperature for consistent responses
         }
         response = requests.post(LLM_URL, headers=headers, json=data)
@@ -57,162 +51,164 @@ def call_llm(messages: List[dict], max_tokens: int = 150) -> str:
         logger.error(f"LLM API call failed: {e}")
         return ""
-def setup_selenium_driver():
-    """Setup selenium driver with headless chrome"""
-    chrome_options = Options()
-    chrome_options.add_argument("--headless")
-    chrome_options.add_argument("--no-sandbox")
-    chrome_options.add_argument("--disable-dev-shm-usage")
-    chrome_options.add_argument("--disable-gpu")
-    chrome_options.add_argument("--window-size=1920,1080")
-    try:
-        driver = webdriver.Chrome(options=chrome_options)
-        return driver
-    except Exception as e:
-        logger.error(f"Failed to setup selenium driver: {e}")
-        return None
 def extract_hidden_elements(html_content: str) -> List[str]:
     """Extract hidden elements from HTML"""
     soup = BeautifulSoup(html_content, 'html.parser')
     hidden_elements = []
-    # Look for hidden inputs, comments, and elements with display:none
     hidden_inputs = soup.find_all('input', {'type': 'hidden'})
     for inp in hidden_inputs:
         if inp.get('value'):
             hidden_elements.append(f"Hidden input: {inp.get('name', 'unnamed')} = {inp.get('value')}")
     # Look for HTML comments
-    comments = soup.find_all(string=lambda text: isinstance(text, str) and '<!--' in text)
     for comment in comments:
-        if comment.strip():
-            hidden_elements.append(f"Comment: {comment.strip()}")
-    # Look for elements with style="display:none" or hidden attribute
     hidden_divs = soup.find_all(attrs={'style': re.compile(r'display\s*:\s*none', re.I)})
     for div in hidden_divs:
-        if div.get_text(strip=True):
-            hidden_elements.append(f"Hidden element: {div.get_text(strip=True)}")
-    # Look for data attributes that might contain codes
-    elements_with_data = soup.find_all(attrs={'data-code': True})
     for elem in elements_with_data:
-        hidden_elements.append(f"Data code: {elem.get('data-code')}")
     return hidden_elements
-def scrape_with_requests(url: str) -> dict:
-    """Scrape webpage using requests"""
     try:
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
-        }
-        response = requests.get(url, headers=headers, timeout=30)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
-        # Extract basic info
         title = soup.find('title')
-        title_text = title.get_text() if title else "No title"
-        # Extract visible text
         visible_text = soup.get_text(separator=' ', strip=True)
         # Extract hidden elements
         hidden_elements = extract_hidden_elements(response.text)
         return {
             'title': title_text,
-            'visible_text': visible_text[:2000],  # Limit text to save tokens
             'hidden_elements': hidden_elements,
             'html': response.text
         }
     except Exception as e:
-        logger.error(f"Request scraping failed for {url}: {e}")
         return {}
-def scrape_with_selenium(url: str) -> dict:
-    """Scrape webpage using selenium for dynamic content"""
-    driver = setup_selenium_driver()
-    if not driver:
-        return {}
-    try:
-        driver.get(url)
-        time.sleep(3)  # Wait for page to load
-        # Get page source after JavaScript execution
-        html_content = driver.page_source
-        soup = BeautifulSoup(html_content, 'html.parser')
-        # Extract basic info
-        title = driver.title
-        visible_text = soup.get_text(separator=' ', strip=True)
-        # Extract hidden elements
-        hidden_elements = extract_hidden_elements(html_content)
-        # Look for buttons or interactive elements
-        buttons = driver.find_elements(By.TAG_NAME, "button")
-        clickable_elements = []
-        for btn in buttons:
-            if btn.is_displayed():
-                clickable_elements.append(f"Button: {btn.text}")
-        return {
-            'title': title,
-            'visible_text': visible_text[:2000],
-            'hidden_elements': hidden_elements,
-            'clickable_elements': clickable_elements,
-            'html': html_content
-        }
-    except Exception as e:
-        logger.error(f"Selenium scraping failed for {url}: {e}")
-        return {}
-    finally:
-        if driver:
-            driver.quit()
-def analyze_page_content(content: dict, question: str) -> str:
-    """Use LLM to analyze page content and answer questions"""
-    if not content:
-        return "Unable to access page content"
-    # Prepare context for LLM (keep it concise to save tokens)
     context_parts = []
     if content.get('title'):
-        context_parts.append(f"Page Title: {content['title']}")
     if content.get('visible_text'):
-        context_parts.append(f"Visible Text: {content['visible_text'][:800]}")
     if content.get('hidden_elements'):
-        context_parts.append(f"Hidden Elements: {'; '.join(content['hidden_elements'][:5])}")
-    if content.get('clickable_elements'):
-        context_parts.append(f"Buttons: {'; '.join(content['clickable_elements'][:3])}")
     context = "\n".join(context_parts)
     messages = [
         {
             "role": "system",
-            "content": "You are analyzing a webpage for a challenge. Be concise and direct in your answers. Look for challenge names, codes, or specific elements mentioned in the question."
         },
         {
             "role": "user",
-            "content": f"Question: {question}\n\nPage Content:\n{context}\n\nProvide a direct answer based on the page content."
         }
     ]
-    return call_llm(messages, max_tokens=100)
 @app.post("/challenge", response_model=ChallengeResponse)
 async def solve_challenge(request: ChallengeRequest):
@@ -220,42 +216,20 @@ async def solve_challenge(request: ChallengeRequest):
     logger.info(f"Received challenge request - URL: {request.url}")
     logger.info(f"Questions: {request.questions}")
-    print("URL:", request.url)
     answers = []
     try:
         for question in request.questions:
             logger.info(f"Processing question: {question}")
-            # First try with requests (faster)
-            page_content = scrape_with_requests(request.url)
-            # If requests fails or doesn't find enough info, try selenium
-            if not page_content or (not page_content.get('hidden_elements') and "hidden" in question.lower()):
-                logger.info("Trying selenium for dynamic content...")
-                page_content = scrape_with_selenium(request.url)
-            # Analyze content with LLM
-            answer = analyze_page_content(page_content, question)
-            # If no clear answer, try to extract from hidden elements directly
-            if not answer or len(answer.strip()) < 3:
-                if page_content.get('hidden_elements'):
-                    # Look for challenge-related terms
-                    for element in page_content['hidden_elements']:
-                        if any(term in element.lower() for term in ['challenge', 'name', 'code', 'hidden']):
-                            answer = element.split(':')[-1].strip()
-                            break
-                if not answer and "challenge name" in question.lower():
-                    # Extract from title or visible text
-                    if page_content.get('title'):
-                        answer = page_content['title']
-            print("Answers: ", answer)
-            answers.append(answer.strip() if answer else "Challenge information not found")
-            logger.info(f"Answer found: {answers[-1]}")
     except Exception as e:
         logger.error(f"Error processing challenge: {e}")
@@ -265,20 +239,19 @@ async def solve_challenge(request: ChallengeRequest):
 @app.get("/health")
 async def health_check():
-    """Health check endpoint"""
-    return {"status": "healthy", "message": "HackRx Mission API is running"}
 @app.get("/")
 async def root():
-    """Root endpoint with API information"""
     return {
         "message": "HackRx Mission API - Ready for action!",
         "endpoints": {
-            "challenge": "/challenge (POST) - Main challenge solving endpoint",
-            "health": "/health (GET) - Health check"
         }
     }
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 from bs4 import BeautifulSoup
 import logging
 import re
 # Configure logging
 logging.basicConfig(level=logging.INFO)
             "messages": messages,
             "model": "gpt-5-nano",
             "max_tokens": max_tokens,
+            "temperature": 0.1
         }
         response = requests.post(LLM_URL, headers=headers, json=data)
         logger.error(f"LLM API call failed: {e}")
         return ""
 def extract_hidden_elements(html_content: str) -> List[str]:
     """Extract hidden elements from HTML"""
     soup = BeautifulSoup(html_content, 'html.parser')
     hidden_elements = []
+    # Look for hidden inputs
     hidden_inputs = soup.find_all('input', {'type': 'hidden'})
     for inp in hidden_inputs:
         if inp.get('value'):
             hidden_elements.append(f"Hidden input: {inp.get('name', 'unnamed')} = {inp.get('value')}")
     # Look for HTML comments
+    comments = soup.find_all(string=lambda text: isinstance(text, str) and text.strip().startswith('<!--'))
     for comment in comments:
+        clean_comment = comment.strip().replace('<!--', '').replace('-->', '').strip()
+        if clean_comment:
+            hidden_elements.append(f"Comment: {clean_comment}")
+    # Look for elements with display:none
     hidden_divs = soup.find_all(attrs={'style': re.compile(r'display\s*:\s*none', re.I)})
     for div in hidden_divs:
+        text = div.get_text(strip=True)
+        if text:
+            hidden_elements.append(f"Hidden element: {text}")
+    # Look for data attributes
+    elements_with_data = soup.find_all(attrs=lambda x: x and any(key.startswith('data-') for key in x.keys()))
     for elem in elements_with_data:
+        for attr, value in elem.attrs.items():
+            if attr.startswith('data-') and value:
+                hidden_elements.append(f"Data attribute {attr}: {value}")
     return hidden_elements
+def advanced_scrape(url: str) -> dict:
+    """Enhanced scraping with better hidden element detection"""
     try:
+        session = requests.Session()
+        session.headers.update({
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Accept-Encoding': 'gzip, deflate',
+            'Connection': 'keep-alive'
+        })
+        response = session.get(url, timeout=30)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
+        # Extract comprehensive information
         title = soup.find('title')
+        title_text = title.get_text().strip() if title else "No title"
+        # Get all text content
         visible_text = soup.get_text(separator=' ', strip=True)
         # Extract hidden elements
         hidden_elements = extract_hidden_elements(response.text)
+        # Look for scripts that might contain data
+        scripts = soup.find_all('script')
+        script_data = []
+        for script in scripts:
+            if script.string:
+                script_content = script.string.strip()
+                if any(keyword in script_content.lower() for keyword in ['challenge', 'code', 'answer', 'hidden']):
+                    script_data.append(f"Script data: {script_content[:200]}")
+        # Look for meta tags
+        meta_data = []
+        meta_tags = soup.find_all('meta')
+        for meta in meta_tags:
+            if meta.get('content'):
+                meta_data.append(f"Meta {meta.get('name', 'unknown')}: {meta.get('content')}")
         return {
             'title': title_text,
+            'visible_text': visible_text[:2000],
             'hidden_elements': hidden_elements,
+            'script_data': script_data,
+            'meta_data': meta_data[:5],  # Limit meta data
             'html': response.text
         }
     except Exception as e:
+        logger.error(f"Advanced scraping failed for {url}: {e}")
         return {}
+def analyze_content_intelligently(content: dict, question: str) -> str:
+    """Intelligent content analysis with multiple strategies"""
+    if not content:
+        return "Unable to access page content"
+    # Strategy 1: Direct pattern matching for common questions
+    if "challenge name" in question.lower():
+        # Look in title first
+        if content.get('title') and content['title'] != "No title":
+            return content['title']
+        # Look in hidden elements
+        for element in content.get('hidden_elements', []):
+            if 'challenge' in element.lower():
+                parts = element.split(':')
+                if len(parts) > 1:
+                    return parts[-1].strip().strip('"').strip("'")
+        # Look in visible text for patterns
+        visible = content.get('visible_text', '')
+        challenge_patterns = [
+            r'challenge[:\s]+([^.\n]+)',
+            r'name[:\s]+([^.\n]+)',
+            r'title[:\s]+([^.\n]+)'
+        ]
+        for pattern in challenge_patterns:
+            match = re.search(pattern, visible, re.IGNORECASE)
+            if match:
+                return match.group(1).strip()
+    # Strategy 2: Use LLM for complex analysis
     context_parts = []
     if content.get('title'):
+        context_parts.append(f"Title: {content['title']}")
     if content.get('visible_text'):
+        context_parts.append(f"Text: {content['visible_text'][:800]}")
     if content.get('hidden_elements'):
+        context_parts.append(f"Hidden: {'; '.join(content['hidden_elements'][:3])}")
+    if content.get('script_data'):
+        context_parts.append(f"Scripts: {'; '.join(content['script_data'][:2])}")
     context = "\n".join(context_parts)
     messages = [
         {
             "role": "system",
+            "content": "Extract the specific answer from webpage content. Be direct and concise. Focus on challenge names, codes, or specific elements requested."
         },
         {
             "role": "user",
+            "content": f"Question: {question}\n\nContent:\n{context}\n\nAnswer:"
         }
     ]
+    llm_answer = call_llm(messages, max_tokens=50)
+    # Strategy 3: Fallback to first meaningful hidden element
+    if not llm_answer or len(llm_answer.strip()) < 3:
+        for element in content.get('hidden_elements', []):
+            if len(element.split(':')) > 1:
+                return element.split(':')[-1].strip()
+    return llm_answer.strip() if llm_answer else "Information not found"
 @app.post("/challenge", response_model=ChallengeResponse)
 async def solve_challenge(request: ChallengeRequest):
     logger.info(f"Received challenge request - URL: {request.url}")
     logger.info(f"Questions: {request.questions}")
     answers = []
     try:
         for question in request.questions:
             logger.info(f"Processing question: {question}")
+            # Scrape the page
+            page_content = advanced_scrape(request.url)
+            # Analyze and get answer
+            answer = analyze_content_intelligently(page_content, question)
+            answers.append(answer)
+            logger.info(f"Answer found: {answer}")
     except Exception as e:
         logger.error(f"Error processing challenge: {e}")
 @app.get("/health")
 async def health_check():
+    return {"status": "healthy", "selenium_available": False}
 @app.get("/")
 async def root():
     return {
         "message": "HackRx Mission API - Ready for action!",
+        "mode": "requests-only",
         "endpoints": {
+            "challenge": "/challenge (POST)",
+            "health": "/health (GET)"
         }
     }
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 8000)))

main.py ADDED Viewed

	@@ -0,0 +1,247 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import List
+import requests
+import base64
+import json
+import os
+from bs4 import BeautifulSoup
+import logging
+import re
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="HackRx Mission API", version="1.0.0")
+class ChallengeRequest(BaseModel):
+    url: str
+    questions: List[str]
+class ChallengeResponse(BaseModel):
+    answers: List[str]
+LLM_URL = "https://register.hackrx.in/llm/openai"
+SUBSCRIPTION_KEY = os.getenv("SUBSCRIPTION_KEY", "sk-****")
+def call_llm(messages: List[dict], max_tokens: int = 150) -> str:
+    """Call the LLM API with token optimization"""
+    try:
+        headers = {
+            'Content-Type': 'application/json',
+            'x-subscription-key': SUBSCRIPTION_KEY
+        }
+        data = {
+            "messages": messages,
+            "model": "gpt-5-nano",
+            "max_tokens": max_tokens,
+            "temperature": 0.1
+        }
+        response = requests.post(LLM_URL, headers=headers, json=data)
+        response.raise_for_status()
+        result = response.json()
+        return result.get('choices', [{}])[0].get('message', {}).get('content', '')
+    except Exception as e:
+        logger.error(f"LLM API call failed: {e}")
+        return ""
+def extract_hidden_elements(html_content: str) -> List[str]:
+    """Extract hidden elements from HTML"""
+    soup = BeautifulSoup(html_content, 'html.parser')
+    hidden_elements = []
+    hidden_inputs = soup.find_all('input', {'type': 'hidden'})
+    for inp in hidden_inputs:
+        if inp.get('value'):
+            hidden_elements.append(f"Hidden input: {inp.get('name', 'unnamed')} = {inp.get('value')}")
+    comments = soup.find_all(string=lambda text: isinstance(text, str) and text.strip().startswith('<!--'))
+    for comment in comments:
+        clean_comment = comment.strip().replace('<!--', '').replace('-->', '').strip()
+        if clean_comment:
+            hidden_elements.append(f"Comment: {clean_comment}")
+    hidden_divs = soup.find_all(attrs={'style': re.compile(r'display\s*:\s*none', re.I)})
+    for div in hidden_divs:
+        text = div.get_text(strip=True)
+        if text:
+            hidden_elements.append(f"Hidden element: {text}")
+    elements_with_data = soup.find_all(attrs=lambda x: x and any(key.startswith('data-') for key in x.keys()))
+    for elem in elements_with_data:
+        for attr, value in elem.attrs.items():
+            if attr.startswith('data-') and value:
+                hidden_elements.append(f"Data attribute {attr}: {value}")
+    return hidden_elements
+def advanced_scrape(url: str) -> dict:
+    """Enhanced scraping with better hidden element detection"""
+    try:
+        session = requests.Session()
+        session.headers.update({
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Accept-Encoding': 'gzip, deflate',
+            'Connection': 'keep-alive'
+        })
+        response = session.get(url, timeout=30)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        title = soup.find('title')
+        title_text = title.get_text().strip() if title else "No title"
+        visible_text = soup.get_text(separator=' ', strip=True)
+        hidden_elements = extract_hidden_elements(response.text)
+        scripts = soup.find_all('script')
+        script_data = []
+        for script in scripts:
+            if script.string:
+                script_content = script.string.strip()
+                if any(keyword in script_content.lower() for keyword in ['challenge', 'code', 'answer', 'hidden']):
+                    script_data.append(f"Script data: {script_content[:200]}")
+        # Look for meta tags
+        meta_data = []
+        meta_tags = soup.find_all('meta')
+        for meta in meta_tags:
+            if meta.get('content'):
+                meta_data.append(f"Meta {meta.get('name', 'unknown')}: {meta.get('content')}")
+        return {
+            'title': title_text,
+            'visible_text': visible_text[:2000],
+            'hidden_elements': hidden_elements,
+            'script_data': script_data,
+            'meta_data': meta_data[:5],  # Limit meta data
+            'html': response.text
+        }
+    except Exception as e:
+        logger.error(f"Advanced scraping failed for {url}: {e}")
+        return {}
+def analyze_content_intelligently(content: dict, question: str) -> str:
+    """Intelligent content analysis with multiple strategies"""
+    if not content:
+        return "Unable to access page content"
+    # Strategy 1: Direct pattern matching for common questions
+    if "challenge name" in question.lower():
+        # Look in title first
+        if content.get('title') and content['title'] != "No title":
+            return content['title']
+        # Look in hidden elements
+        for element in content.get('hidden_elements', []):
+            if 'challenge' in element.lower():
+                parts = element.split(':')
+                if len(parts) > 1:
+                    return parts[-1].strip().strip('"').strip("'")
+        # Look in visible text for patterns
+        visible = content.get('visible_text', '')
+        challenge_patterns = [
+            r'challenge[:\s]+([^.\n]+)',
+            r'name[:\s]+([^.\n]+)',
+            r'title[:\s]+([^.\n]+)'
+        ]
+        for pattern in challenge_patterns:
+            match = re.search(pattern, visible, re.IGNORECASE)
+            if match:
+                return match.group(1).strip()
+    # Strategy 2: Use LLM for complex analysis
+    context_parts = []
+    if content.get('title'):
+        context_parts.append(f"Title: {content['title']}")
+    if content.get('visible_text'):
+        context_parts.append(f"Text: {content['visible_text'][:800]}")
+    if content.get('hidden_elements'):
+        context_parts.append(f"Hidden: {'; '.join(content['hidden_elements'][:3])}")
+    if content.get('script_data'):
+        context_parts.append(f"Scripts: {'; '.join(content['script_data'][:2])}")
+    context = "\n".join(context_parts)
+    messages = [
+        {
+            "role": "system",
+            "content": "Extract the specific answer from webpage content. Be direct and concise. Focus on challenge names, codes, or specific elements requested."
+        },
+        {
+            "role": "user",
+            "content": f"Question: {question}\n\nContent:\n{context}\n\nAnswer:"
+        }
+    ]
+    llm_answer = call_llm(messages, max_tokens=50)
+    # Strategy 3: Fallback to first meaningful hidden element
+    if not llm_answer or len(llm_answer.strip()) < 3:
+        for element in content.get('hidden_elements', []):
+            if len(element.split(':')) > 1:
+                return element.split(':')[-1].strip()
+    return llm_answer.strip() if llm_answer else "Information not found"
+@app.post("/challenge", response_model=ChallengeResponse)
+async def solve_challenge(request: ChallengeRequest):
+    """Main endpoint to solve HackRx challenges"""
+    logger.info(f"Received challenge request - URL: {request.url}")
+    logger.info(f"Questions: {request.questions}")
+    answers = []
+    try:
+        for question in request.questions:
+            logger.info(f"Processing question: {question}")
+            # Scrape the page
+            page_content = advanced_scrape(request.url)
+            # Analyze and get answer
+            answer = analyze_content_intelligently(page_content, question)
+            answers.append(answer)
+            logger.info(f"Answer found: {answer}")
+    except Exception as e:
+        logger.error(f"Error processing challenge: {e}")
+        raise HTTPException(status_code=500, detail=f"Challenge processing failed: {str(e)}")
+    return ChallengeResponse(answers=answers)
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy", "selenium_available": False}
+@app.get("/")
+async def root():
+    return {
+        "message": "HackRx Mission API - Ready for action!",
+        "mode": "requests-only",
+        "endpoints": {
+            "challenge": "/challenge (POST)",
+            "health": "/health (GET)"
+        }
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 8000)))

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ chromium
2	+ chromium-driver

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
-fastapi
-uvicorn
-requests
-beautifulsoup4
-selenium
-pydantic

+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+requests==2.31.0
+beautifulsoup4==4.12.2
+pydantic==2.5.0
+selenium==4.15.0
+python-multipart==0.0.6