Spaces:

MukeshKapoor25
/

NERTest

Sleeping

File size: 3,348 Bytes

import spacy
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional, Dict, Any

# Load spaCy language model
nlp = spacy.load("en_core_web_sm")

# FastAPI instance
app = FastAPI()

# Define mappings for categories and filters
KEYWORD_MAPPINGS = {
    "categories": {
        "salon": "Salon",
        "spa": "Spa",
        "gym": "Fitness",
        "fitness": "Fitness",
        "pet spa": "Pet Spa",
        "dental": "Dental",
        "nail art": "Nail Art",
        "tattoo": "Tattoo",
    },
    "filters": {
        "top": {"top_rated": True},
        "top-rated": {"top_rated": True},
        "highly-rated": {"top_rated": True},
        "best": {"top_rated": True},
        "popular": {"popular": True},
        "trending": {"trending": True},
        "near me": {"radius": 500},
        "around me": {"radius": 500},
        "nearby": {"radius": 500},
    },
}

# Pydantic schema for the input query
class QueryRequest(BaseModel):
    sentence: str
    latitude: Optional[float] = None
    longitude: Optional[float] = None

def parse_sentence_to_query_ner(sentence: str, lat: Optional[float] = None, lng: Optional[float] = None) -> Dict[str, Any]:
    """
    Parse a sentence using spaCy NER and build a search query.

    Args:
        sentence (str): Input sentence to parse.
        lat (Optional[float]): Latitude for geolocation.
        lng (Optional[float]): Longitude for geolocation.

    Returns:
        Dict[str, Any]: Parsed search query.
    """
    # Initialize query with defaults
    query = {
        "location_id": None,
        "latitude": lat,
        "longitude": lng,
        "radius": None,
        "merchant_category": None,
        "business_name": None,
        "top_rated": False,
        "popular": False,
        "trending": False,
    }

    # Process the sentence using spaCy
    doc = nlp(sentence)

    # Extract categories from sentence using token matching
    for token in doc:
        for keyword, category in KEYWORD_MAPPINGS["categories"].items():
            if keyword in token.text.lower():
                query["merchant_category"] = category
                break

    # Extract filters using token and phrase matching
    for phrase, filter_dict in KEYWORD_MAPPINGS["filters"].items():
        if phrase in sentence.lower():
            query.update(filter_dict)

    # Extract potential business names using NER
    for ent in doc.ents:
        if ent.label_ in ["ORG", "PERSON", "GPE"]:  # Relevant entity types
            query["business_name"] = ent.text
            break

    # Use NER to extract location context
    if "near me" in sentence.lower() or "around me" in sentence.lower() or "nearby" in sentence.lower():
        if lat is not None and lng is not None:
            query["radius"] = 500  # Default radius for "near me"

    return query

# FastAPI route
@app.post("/parse-query/")
async def parse_query(request: QueryRequest):
    """
    API endpoint to parse a query and return a structured response.

    Args:
        request (QueryRequest): Input sentence and optional location data.

    Returns:
        Dict[str, Any]: Parsed search query.
    """
    parsed_query = parse_sentence_to_query_ner(
        sentence=request.sentence,
        lat=request.latitude,
        lng=request.longitude
    )
    return {"query": parsed_query}