AU_MERIT_SCRAP / app.py
hashir672's picture
Update app.py
b2225b5 verified
import requests
from bs4 import BeautifulSoup
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Thread
from flask import Flask, abort, send_file, render_template, request
import sys
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
def get_ip_and_country():
# Get IP address
ip_response = requests.get('https://api.ipify.org?format=json')
ip_data = ip_response.json()
ip_address = ip_data['ip']
# Get country based on IP address
geo_response = requests.get(f'https://ipapi.co/{ip_address}/json/')
geo_data = geo_response.json()
country = geo_data.get('country_name', 'Unknown')
print(f'IP Address: {ip_address}')
print(f'Country: {country}')
def send_email(subject, body, to_email, file_path):
# Email account credentials
from_email = "hhashirkashif@gmail.com"
password = "Hashirisbest#1122"
# Create message container
msg = MIMEMultipart()
msg['From'] = from_email
msg['To'] = to_email
msg['Subject'] = subject
# Attach the email body
msg.attach(MIMEText(body, 'plain'))
# Attach the file
if file_path:
with open(file_path, 'rb') as file:
part = MIMEBase('application', 'octet-stream')
part.set_payload(file.read())
encoders.encode_base64(part)
part.add_header(
'Content-Disposition',
f'attachment; filename={file_path}',
)
msg.attach(part)
# Connect to Gmail server and send email
try:
server = smtplib.SMTP('smtp.gmail.com', 587)
server.starttls()
server.login(from_email, password)
server.sendmail(from_email, to_email, msg.as_string())
server.quit()
print("Email sent successfully")
except Exception as e:
print(f"Failed to send email: {e}")
# Example usage
# Define the URL
URL = "https://portals.au.edu.pk/aumeritlist/ETS_View.aspx"
class Unbuffered(object):
def __init__(self, stream):
self.stream = stream
def write(self, data):
self.stream.write(data)
self.stream.flush()
def writelines(self, datas):
self.stream.writelines(datas)
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
sys.stdout = Unbuffered(sys.stdout)
def empty_file(file_path):
with open(file_path, 'w') as file:
file.write("")
print("file emptied")
def get_form_fields():
response = requests.get(URL)
soup = BeautifulSoup(response.text, 'html.parser')
form_data = {
'__VIEWSTATE':
soup.find('input', {'id': '__VIEWSTATE'})['value'],
'__VIEWSTATEGENERATOR':
soup.find('input', {'id': '__VIEWSTATEGENERATOR'})['value'],
'__EVENTVALIDATION':
soup.find('input', {'id': '__EVENTVALIDATION'})['value']
}
return form_data
def fetch_student_results(admit_card_number, form_data):
headers = {
"Host":
"portals.au.edu.pk",
"User-Agent":
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0"
}
payload = {
"__EVENTTARGET": "",
"__EVENTARGUMENT": "",
"__VIEWSTATE": form_data['__VIEWSTATE'],
"__VIEWSTATEGENERATOR": form_data['__VIEWSTATEGENERATOR'],
"__EVENTVALIDATION": form_data['__EVENTVALIDATION'],
"ctl00$AUContent$txt_regid": str(admit_card_number),
"__ASYNCPOST": "true",
"ctl00$AUContent$btnShow": "Search Result"
}
response = requests.post(URL, headers=headers, data=payload)
soup = BeautifulSoup(response.text, 'lxml')
merit_info_div = soup.find('div', {'id': 'AUContent_div_meritinfo'})
if merit_info_div:
admit_card_no = merit_info_div.find('span', {
'id': 'AUContent_lbl_admitcardno'
}).text.strip()
full_name = merit_info_div.find('span', {
'id': 'AUContent_lbl_fullname'
}).text.strip()
program_name = merit_info_div.find('span', {
'id': 'AUContent_lbl_programname'
}).text.strip()
aggregate_merit_score = merit_info_div.find(
'span', {
'id': 'AUContent_lbl_meritscore'
}).text.strip()
return {
'Admit Card Number': admit_card_no,
'Full Name': full_name,
'1st Preference': program_name,
'Aggregate Merit Score': aggregate_merit_score
}
else:
return None
def collect_merit_data(start, end):
merit_list = []
form_data = get_form_fields()
with ThreadPoolExecutor(max_workers=50) as executor:
future_to_admit_card = {
executor.submit(fetch_student_results, admit_card_number, form_data):
admit_card_number
for admit_card_number in range(start, end + 1)
}
for future in as_completed(future_to_admit_card):
admit_card_number = future_to_admit_card[future]
try:
result = future.result()
if result:
merit_list.append(result)
print(
f"Successfully found data for Admit Card Number: {admit_card_number}"
)
else:
print(
f"No data found for Admit Card Number: {admit_card_number}"
)
except Exception as e:
print(
f"Error 404 fetching data for Admit Card Number: {admit_card_number} - {e}"
)
return merit_list
import os
def mainTask():
merit_data = collect_merit_data(2400015, 2420300) #Admit cards range defined
df = pd.DataFrame(merit_data)
df.to_csv('merit_list.csv', index=False)
# group by 1st preference
grouped_df = df.groupby('1st Preference').apply(
lambda x: x.reset_index(drop=True))
grouped_df.to_csv('grouped_merit_list.csv', index=False)
print("Saved Successfully.")
print("now sending file")
subject = "AU MERIT UNOFFICIAL"
body = "Au merit List"
to_email = "hhashirkashif@gmail.com"
file_path = "./grouped_merit_list.csv"
send_email(subject, body, "hhashirkashif@gmail.com", file_path)
send_email(subject, body, "hashirisbest1@gmail.com", file_path)
empty_file("./status.txt")
os.remove("./grouped_merit_list.csv")
os.remove("./merit_list.csv")
app = Flask(__name__)
@app.route('/')
def root():
return "Hey"
@app.route('/main')
def main():
get_ip_and_country()
l = requests.get(URL)
print(l.status_code)
# Specify the file path
file_path = "./status.txt"
if os.path.exists(file_path):
with open(file_path, 'r') as file:
content = file.read().strip()
if content == "running":
print("file already running")
return "Program still running plz wait"
else:
with open(file_path, 'w') as file:
file.write("running")
print("file status set to running")
# thr = Thread(target=testTask)
thr = Thread(target=mainTask)
thr.start()
# mainTask()
return 'Program started plz wait 20 mins'