File size: 2,542 Bytes
9142902
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import re

def parse_query(user_input: str):
    """
    🌾 Project Samarth — Query Parser (Final Version)
    --------------------------------
    Converts user natural language question into structured query.
    """

    query = (user_input or "").lower().strip()
    result = {
        "states": [],
        "crop": None,
        "years": 5,  # Default
        "metrics": [],
        "query_type": "general"
    }

    # 1️⃣ Extract number of years
    match = re.search(r"last (\d+) years?", query)
    if match:
        result["years"] = int(match.group(1))

    # 2️⃣ Extract states — only ones that exist in your merged dataset
    state_list = [
        "andaman and nicobar islands", "andhra pradesh", "bihar", "jharkhand",
        "odisha", "tamil nadu", "rajasthan", "uttar pradesh", "west bengal",
        "kerala", "karnataka", "maharashtra"
    ]
    found_states = [s for s in state_list if s in query]
    if found_states:
        result["states"] = found_states

    # 3️⃣ Extract crop
    crop_list = [
        "rice", "maize", "wheat", "sugarcane", "turmeric", "banana", "groundnut",
        "arecanut", "sunflower", "moong", "urad", "black pepper", "cashewnut"
    ]
    for crop in crop_list:
        if crop in query:
            result["crop"] = crop
            break

    # 4️⃣ Extract metrics
    if "rainfall" in query:
        result["metrics"].append("rainfall")
    if "production" in query:
        result["metrics"].append("production")

    # Default metrics
    if not result["metrics"]:
        result["metrics"] = ["rainfall", "production"]

    # 5️⃣ Determine query type
    if "compare" in query:
        result["query_type"] = "compare_rainfall_production"
    elif "trend" in query:
        result["query_type"] = "crop_trend"
    elif "highest" in query:
        result["query_type"] = "highest_production"
    elif "policy" in query or "promote" in query:
        result["query_type"] = "policy_support"
    else:
        result["query_type"] = "general"

    return result


# 🧪 Quick test
if __name__ == "__main__":
    queries = [
        "Compare rainfall and rice production in Andaman and Nicobar Islands for the last 5 years",
        "Show rainfall trend for Rice in Andhra Pradesh for the last 10 years",
        "Which district had highest rice production in Andhra Pradesh?",
        "Suggest policy to promote drought-resistant crops in Odisha"
    ]
    for q in queries:
        print(f"\n🔍 Query: {q}")
        print("Parsed Output:", parse_query(q))