Spaces:
Runtime error
Runtime error
| import re | |
| import requests | |
| import gradio as gr | |
| import pandas as pd | |
| from transformers import pipeline | |
| from transformers import AutoTokenizer | |
| from transformers import AutoModelForSequenceClassification | |
| def process_tweet(tweet): | |
| # remove links | |
| tweet = re.sub('((www\.[\s]+)|(https?://[^\s]+))', '', tweet) | |
| # remove usernames | |
| tweet = re.sub('@[^\s]+', '', tweet) | |
| # remove additional white spaces | |
| tweet = re.sub('[\s]+', ' ', tweet) | |
| # replace hashtags with words | |
| tweet = re.sub(r'#([^\s]+)', r'\1', tweet) | |
| # trim | |
| tweet = tweet.strip('\'"') | |
| return tweet | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| "azamat/geocoder_coordinates_model" | |
| ) | |
| relevancy_pipeline = pipeline("sentiment-analysis", model="azamat/geocoder_relevancy_model") | |
| coordinates_model = AutoModelForSequenceClassification.from_pretrained( | |
| "azamat/geocoder_coordinates_model", | |
| ) | |
| def predict_relevancy(text): | |
| outputs = relevancy_pipeline(text) | |
| return outputs[0]['label'], outputs[0]['score'] | |
| def predict_coordinates(text): | |
| encoding = tokenizer(text, padding="max_length", truncation=True, \ | |
| max_length=128, return_tensors='pt') | |
| outputs = coordinates_model(**encoding) | |
| return round(outputs[0][0][0].item(), 3), round(outputs[0][0][1].item(), 3) | |
| def reverse_geocode(lat, lon): | |
| payload = { | |
| 'lat' : lat, | |
| 'lon' : lon, | |
| 'zoom' : 12, | |
| 'format' : 'jsonv2', | |
| 'accept-language' : 'en' | |
| } | |
| try: | |
| r = requests.get('https://geocode.maps.co/reverse', params=payload) | |
| return r.json()['display_name'] | |
| except: | |
| return "No data" | |
| def predict(text): | |
| text = process_tweet(text) | |
| data = { | |
| "relevancy_score" : 0, | |
| "lat" : 0, | |
| "lon" : 0, | |
| "reversed lat/lon" : "" | |
| } | |
| relevancy_label, relevancy_score = predict_relevancy(text) | |
| if relevancy_label == 'relevant': | |
| data['relevancy_score'] = round(relevancy_score * 100, 2) | |
| lat, lon = predict_coordinates(text) | |
| data['lat'] = lat | |
| data['lon'] = lon | |
| reverse_geocoded = reverse_geocode(lat, lon) | |
| data['reversed lat/lon'] = reverse_geocoded | |
| return pd.DataFrame([data]) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# **<p align='center'>Twitter geocoding with 🤗 Transformers</p>**") | |
| gr.Markdown("### <div align='left'>Pipeline consists of:</div>") | |
| gr.Markdown("### <div align='left'>1) Relevancy scoring model - predicts whether a tweet has geocoding related information</div>") | |
| gr.Markdown("### <div align='left'>2) Coordinate predicting model - predicts exact latitude and longitude of user by tweet</div>") | |
| gr.Markdown("### <div align='left'>3) Nominatim API for reverse geocoding lat/lon - uses open street map to reverse geocode lat and lon</div>") | |
| inputs = gr.Textbox(placeholder="Enter the tweet") | |
| outputs = [gr.Dataframe(label="Geocoded data")] | |
| inputs.submit(predict, inputs=inputs, outputs=outputs) | |
| if __name__ == "__main__": | |
| demo.launch() |