File size: 3,493 Bytes
36c3f07
0ebd299
36c3f07
 
 
 
 
 
 
 
0ebd299
36c3f07
 
0ebd299
 
 
 
 
36c3f07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from gradio_client import Client
import json
import gradio as gr
from webdriver_manager.chrome import ChromeDriverManager

def scrape_data(amazon_url):
    service = Service(ChromeDriverManager().install())
    options = Options()
    options.add_argument("--headless")

    driver = webdriver.Chrome(service=service, options=options)

    driver.get(amazon_url)

    amazon_title_element = WebDriverWait(driver, 5).until(
        EC.presence_of_element_located((By.ID, 'productTitle'))
    )
    amazon_title = amazon_title_element.text.strip()

    amazon_desc_element = driver.find_element(By.ID, 'feature-bullets')
    amazon_desc = amazon_desc_element.text.strip()

    driver.get(f'https://www.flipkart.com/search?q={"+".join(amazon_title.split()[:6])}')
    
    flipkart_url = ''
    elements = driver.find_elements(By.CLASS_NAME, '_2rpwqI')
    for element in elements:
        text_content = element.get_attribute('href')
        if amazon_title.split()[0].lower() in text_content:
            flipkart_url = text_content
            break

    if not flipkart_url:
        elements = driver.find_elements(By.CLASS_NAME, '_1fQZEK')
        for element in elements:
            text_content = element.get_attribute('href')
            if amazon_title.split()[0].lower() in text_content:
                flipkart_url = text_content
                break
    
    if(flipkart_url.find('flipkart.com') == -1):
        flipkart_url = 'https://www.flipkart.com' + flipkart_url

    driver.get(flipkart_url)
    final_elements = driver.find_elements(By.CLASS_NAME, '_2418kt')
    flipkart_desc = ''
    for element in final_elements:
        flipkart_desc = element.text

    driver.quit()

    return amazon_desc, flipkart_desc

def compare_descriptions(amazon_url):
    client = Client("https://atulit23-google-flan-t5.hf.space/")

    print(amazon_url)

    amazon_desc, flipkart_desc = scrape_data(amazon_url)

    description_to_compare = "First description: " + amazon_desc + " " + "Second Description: " + flipkart_desc + " As you can see I have provided you with two descriptions. Compare these two descriptions to see if for the same field some different information has been provided. Answer in yes or no."

    print(description_to_compare)

    result = client.predict(
        description_to_compare,
        api_name="/predict"
    )

    final_result = {"Amazon": amazon_desc, "Flipkart": flipkart_desc, "result": ""}
    if result.find("yes") != -1:
        final_result["result"] = "Mismatch Detected between the descriptions at Amazon & Flipkart."
    else:
        final_result["result"] = "No Mismatch Detected between the descriptions at Amazon & Flipkart."

    return json.dumps(final_result)


inputs_image_url = [
    gr.Textbox(type="text", label="Topic Name"),
]

outputs_result_dict = [
    gr.Textbox(type="text", label="Result"),
]

interface_image_url = gr.Interface(
    fn=compare_descriptions,
    inputs=inputs_image_url,
    outputs=outputs_result_dict,
    title="Text Generation",
    cache_examples=False,
)

gr.TabbedInterface(
    [interface_image_url],
    tab_names=['Some inference']
).launch()