Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import pipeline | |
| from datetime import datetime | |
| import pandas as pd | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import re | |
| benefits = [ | |
| {"benefitName": "Universal Credit", "coreName": "what is this benefit", "link": "https://www.gov.uk/universal-credit/"}, | |
| {"benefitName": "Universal Credit", "coreName": "who can apply", "link": "https://www.gov.uk/universal-credit/eligibility"}, | |
| {"benefitName": "Universal Credit", "coreName": "how much can I get", "link": "https://www.gov.uk/universal-credit/what-youll-get,https://www.gov.uk/universal-credit/how-youre-paid"}, | |
| {"benefitName": "Universal Credit", "coreName": "How to apply", "link": "https://www.gov.uk/universal-credit/how-to-claim"} | |
| ] | |
| def requestPage(link): | |
| page = requests.get(link) | |
| # print(page.text) | |
| soup = BeautifulSoup(page.content, "html.parser") | |
| return soup | |
| def scrapeTable(table): | |
| columns = [col.text.strip() for col in table.thead.tr.find_all()] | |
| columns | |
| rows = table.tbody.find_all(recursive=False) | |
| clean_rows = "" | |
| for row in rows: | |
| elements = ["{}: {}".format(columns[index], element.text.strip()) for index, element in enumerate(row.find_all(recursive=False))] | |
| elements = " ".join(elements) | |
| # print(elements) | |
| clean_rows += elements + "\n" | |
| return clean_rows | |
| def scrapePage(page): | |
| # Scrape the text | |
| corpus = "" | |
| # starting from the main page | |
| content = page.find('div', {"id":"guide-contents"}) | |
| title = content.find('h1', {"class":"part-title"}) | |
| title = title.text.strip() | |
| corpus += title +"\n\n" | |
| print(title) | |
| content = content.find('div', {"class":"gem-c-govspeak"}) | |
| fragments = content.find_all(recursive=False) | |
| for frag in fragments: | |
| text= frag.text.strip() | |
| if frag.name == 'ul': | |
| clean = re.sub('\n+', "{;}", text) | |
| corpus += "{;}" + clean | |
| elif frag.name == 'table': | |
| corpus += scrapeTable(frag) | |
| else: | |
| corpus += text | |
| corpus += "\n" | |
| # print(corpus) | |
| return corpus | |
| for benefit in benefits: | |
| links = benefit['link'].split(',') | |
| print(benefit['benefitName'], benefit['coreName'], len(links)) | |
| context = "" | |
| for link in links: | |
| page = requestPage(link) | |
| context += scrapePage(page) | |
| benefit['context'] = context | |
| benefit['contextLen'] = len(context) | |
| print("--------------------------------") | |
| benefitsClasses = list(set(list(map(lambda x: x['benefitName'], benefits)))) | |
| core4Classes = list(set(list(map(lambda x: x['coreName'], benefits)))) | |
| # contexts | |
| benefitsClasses, core4Classes | |
| question_answerer = pipeline("question-answering") | |
| coreName = 'how much can I get' | |
| def testQA(question): | |
| predictedBenefit = "Universal Credit" | |
| predictedCore = coreName | |
| time = datetime.now() | |
| context = list(filter(lambda x: x['benefitName']==predictedBenefit and x['coreName']==predictedCore, benefits))[0] | |
| answer = question_answerer(question = question, context = context['context'])['answer'] | |
| time3 = (datetime.now() - time).total_seconds() | |
| return answer | |
| iface = gr.Interface(fn=testQA, inputs="text", outputs="text") | |
| iface.launch() |