Spaces:
Runtime error
Runtime error
LC
Browse files
app.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
import numpy as np
|
| 2 |
import pickle
|
| 3 |
import pandas as pd
|
| 4 |
-
import requests
|
| 5 |
-
from selenium import webdriver
|
| 6 |
import matplotlib.pyplot as plt
|
| 7 |
#Simple assignment
|
| 8 |
-
from selenium.webdriver import Firefox
|
| 9 |
-
from selenium.webdriver.common.keys import Keys
|
| 10 |
from selenium.common.exceptions import NoSuchElementException
|
| 11 |
import requests
|
| 12 |
import os
|
|
@@ -20,73 +20,73 @@ import streamlit as st
|
|
| 20 |
### Scrap the cosmic id information
|
| 21 |
# ### FRAMEWORKS NEEDED
|
| 22 |
|
| 23 |
-
def scrap():
|
| 24 |
-
#### Setting options to the driver
|
| 25 |
-
options = webdriver.FirefoxOptions()
|
| 26 |
-
options.add_argument('--headless')
|
| 27 |
-
options.add_argument('--no-sandbox')
|
| 28 |
-
options.add_argument('--disable-dev-shm-usage')
|
| 29 |
-
options.capabilities
|
| 30 |
-
### Setting options of webdriver
|
| 31 |
-
# a) Setting the chromedriver
|
| 32 |
-
browser = Firefox(options=options,executable_path=r"C:\Users\Pablo\OneDrive\Documents\Documentos\Escuela Politécnica Superior Leganés\4 AÑO\ASIGNATURAS\1 CUATRI\WEB ANALYTICS\PART 2\Milestone3\geckodriver.exe")
|
| 33 |
-
### Functions and execution to run the scrapping
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
def getinfofromtable(oddrows:list,score:float,headertable)->list:
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
def getinfocosmic(mutationid):
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
|
| 59 |
-
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
|
| 66 |
|
| 67 |
|
| 68 |
-
|
| 69 |
|
| 70 |
|
| 71 |
-
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
|
| 76 |
-
|
| 77 |
-
|
| 78 |
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
### Pieplots
|
| 91 |
def pieplot(merging,id=0):
|
| 92 |
genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
import pickle
|
| 3 |
import pandas as pd
|
| 4 |
+
# import requests
|
| 5 |
+
# from selenium import webdriver
|
| 6 |
import matplotlib.pyplot as plt
|
| 7 |
#Simple assignment
|
| 8 |
+
# from selenium.webdriver import Firefox
|
| 9 |
+
# from selenium.webdriver.common.keys import Keys
|
| 10 |
from selenium.common.exceptions import NoSuchElementException
|
| 11 |
import requests
|
| 12 |
import os
|
|
|
|
| 20 |
### Scrap the cosmic id information
|
| 21 |
# ### FRAMEWORKS NEEDED
|
| 22 |
|
| 23 |
+
# def scrap():
|
| 24 |
+
# #### Setting options to the driver
|
| 25 |
+
# options = webdriver.FirefoxOptions()
|
| 26 |
+
# options.add_argument('--headless')
|
| 27 |
+
# options.add_argument('--no-sandbox')
|
| 28 |
+
# options.add_argument('--disable-dev-shm-usage')
|
| 29 |
+
# options.capabilities
|
| 30 |
+
# ### Setting options of webdriver
|
| 31 |
+
# # a) Setting the chromedriver
|
| 32 |
+
# browser = Firefox(options=options,executable_path=r"C:\Users\Pablo\OneDrive\Documents\Documentos\Escuela Politécnica Superior Leganés\4 AÑO\ASIGNATURAS\1 CUATRI\WEB ANALYTICS\PART 2\Milestone3\geckodriver.exe")
|
| 33 |
+
# ### Functions and execution to run the scrapping
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# def getinfofromtable(oddrows:list,score:float,headertable)->list:
|
| 37 |
+
# rows = []
|
| 38 |
+
# for row in oddrows:
|
| 39 |
+
# cols = []
|
| 40 |
+
# for (i,col) in enumerate(row.find_elements_by_css_selector("td")):
|
| 41 |
+
# if i==headertable.index( 'Primary Tissue') or i==headertable.index('Primary Histology') or i==headertable.index('Zygosity'):
|
| 42 |
+
# cols.append(col.text)
|
| 43 |
+
# cols.append(score)
|
| 44 |
+
# rows.append(cols)
|
| 45 |
+
# return rows
|
| 46 |
+
# def getinfocosmic(mutationid):
|
| 47 |
+
# import time
|
| 48 |
+
# search = browser.find_element_by_id('search-field')
|
| 49 |
+
# search = search.find_element_by_class_name("text_def")
|
| 50 |
+
# search.send_keys(mutationid)
|
| 51 |
+
# search.send_keys(Keys.RETURN)
|
| 52 |
+
# time.sleep(5)
|
| 53 |
+
# try:
|
| 54 |
+
# container = browser.find_element_by_id("section-list")
|
| 55 |
|
| 56 |
+
# except NoSuchElementException:
|
| 57 |
+
# return []
|
| 58 |
|
| 59 |
+
# try:
|
| 60 |
|
| 61 |
+
# subq1 = container.text[container.text.find("score")+len("score"):]
|
| 62 |
+
# score = float(subq1[:subq1.find(")")].strip())
|
| 63 |
+
# except ValueError:
|
| 64 |
+
# score = 0
|
| 65 |
|
| 66 |
|
| 67 |
|
| 68 |
+
# section = browser.find_element_by_id("DataTables_Table_0")
|
| 69 |
|
| 70 |
|
| 71 |
+
# headertable = [header.text for header in section.find_element_by_tag_name("thead").find_elements_by_tag_name("th")]
|
| 72 |
|
| 73 |
+
# oddrows = section.find_elements_by_class_name("odd")
|
| 74 |
+
# evenrows = section.find_elements_by_class_name("even")
|
| 75 |
|
| 76 |
+
# l1 = getinfofromtable(oddrows,score,headertable)
|
| 77 |
+
# l1.extend(getinfofromtable(evenrows,score,headertable))
|
| 78 |
|
| 79 |
+
# # browser.close()
|
| 80 |
+
# return l1
|
| 81 |
+
# ## Looking for cosmic id info
|
| 82 |
+
# cosl = []
|
| 83 |
+
# browser.get("https://cancer.sanger.ac.uk/cosmic")
|
| 84 |
+
# for cos in cosmicinfo.reset_index()["COSMIC_ID"].iloc[20:]:
|
| 85 |
+
# if cos.find(",")!=-1:
|
| 86 |
+
# cos = cos.split(",")[0]
|
| 87 |
+
|
| 88 |
+
# cosl.append(getinfocosmic(cos))
|
| 89 |
+
# browser.get("https://cancer.sanger.ac.uk/cosmic")
|
| 90 |
### Pieplots
|
| 91 |
def pieplot(merging,id=0):
|
| 92 |
genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
|