{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "fd8a6f36", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "process_genes = pd.read_csv(\"./pert_folder/EGRET_K562.csv\")" ] }, { "cell_type": "code", "execution_count": 2, "id": "81f7adc6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDENSG00000000419ENSG00000000457ENSG00000000938ENSG00000001036ENSG00000001084ENSG00000001561ENSG00000001617ENSG00000001629ENSG00000002079...ENSG00000273473ENSG00000273474ENSG00000273476ENSG00000273477ENSG00000273478ENSG00000273481ENSG00000273483ENSG00000273486ENSG00000273488ENSG00000273489
0AHR-1.504834-2.0044402.203951-1.287421-1.978774-1.583057-0.1681060.6181841.489755...-0.807384-1.453073-1.342309-1.0032371.739622-0.806837-0.741710-1.416639-1.595790-1.287100
1AIRE2.4511822.8988781.3075462.4343291.955148-0.007879-2.2794702.828862-1.137716...-1.4796862.307794-1.029295-0.8770290.091691-1.3825221.5932751.8125760.022190-0.450923
2ALX12.6168402.772501-1.397472-0.1686172.128705-0.0176280.4148662.6986621.795671...-1.344753-0.316649-0.929455-0.7659702.7554661.7035041.879228-0.8005862.708451-0.603309
3ALX30.3479622.774667-1.6027250.1129852.3596093.024013-2.4983062.4974351.798003...1.7465842.7733642.1196702.275466-0.3763961.7951572.0877782.163802-0.182214-0.701738
4ALX40.5124462.902977-1.5147370.2903672.5519552.952138-2.3988322.6412671.990105...1.8205072.858623-0.6819972.397084-0.4493761.6626132.1887242.135710-0.063781-0.744507
..................................................................
682ZNF7841.7170361.4656662.140282-0.762429-1.601689-1.301235-0.352277-1.6845731.727677...-1.035371-1.259863-1.347623-0.765765-1.194990-0.849804-0.636089-1.549853-1.125604-0.983317
683ZSCAN101.225540-1.178199-0.326419-0.9958451.207252-1.3465522.447934-1.5497421.490069...-1.225386-1.667570-1.3217661.866304-0.810302-0.911718-0.905080-1.759080-0.961533-0.874555
684ZSCAN16-0.3822822.653422-0.900343-0.1488252.094933-0.3996581.1900762.542048-0.953970...-1.305299-0.5334801.726591-0.6200792.6061241.8148092.019223-1.058394-0.237949-0.713701
685ZSCAN262.973132-0.638010-1.4055392.9528092.388155-0.502097-1.8229402.028961-1.249959...-0.774847-0.251140-0.619712-0.290817-1.060838-0.861441-0.239722-0.578209-0.5768642.366263
686ZSCAN4-0.985399-0.0953602.101043-0.6318671.801044-0.6410271.8489442.5984622.030291...-1.5429301.6864641.476578-0.8649392.815260-0.963962-0.937383-1.5024222.509741-0.553535
\n", "

687 rows × 22937 columns

\n", "
" ], "text/plain": [ " ID ENSG00000000419 ENSG00000000457 ENSG00000000938 \\\n", "0 AHR -1.504834 -2.004440 2.203951 \n", "1 AIRE 2.451182 2.898878 1.307546 \n", "2 ALX1 2.616840 2.772501 -1.397472 \n", "3 ALX3 0.347962 2.774667 -1.602725 \n", "4 ALX4 0.512446 2.902977 -1.514737 \n", ".. ... ... ... ... \n", "682 ZNF784 1.717036 1.465666 2.140282 \n", "683 ZSCAN10 1.225540 -1.178199 -0.326419 \n", "684 ZSCAN16 -0.382282 2.653422 -0.900343 \n", "685 ZSCAN26 2.973132 -0.638010 -1.405539 \n", "686 ZSCAN4 -0.985399 -0.095360 2.101043 \n", "\n", " ENSG00000001036 ENSG00000001084 ENSG00000001561 ENSG00000001617 \\\n", "0 -1.287421 -1.978774 -1.583057 -0.168106 \n", "1 2.434329 1.955148 -0.007879 -2.279470 \n", "2 -0.168617 2.128705 -0.017628 0.414866 \n", "3 0.112985 2.359609 3.024013 -2.498306 \n", "4 0.290367 2.551955 2.952138 -2.398832 \n", ".. ... ... ... ... \n", "682 -0.762429 -1.601689 -1.301235 -0.352277 \n", "683 -0.995845 1.207252 -1.346552 2.447934 \n", "684 -0.148825 2.094933 -0.399658 1.190076 \n", "685 2.952809 2.388155 -0.502097 -1.822940 \n", "686 -0.631867 1.801044 -0.641027 1.848944 \n", "\n", " ENSG00000001629 ENSG00000002079 ... ENSG00000273473 ENSG00000273474 \\\n", "0 0.618184 1.489755 ... -0.807384 -1.453073 \n", "1 2.828862 -1.137716 ... -1.479686 2.307794 \n", "2 2.698662 1.795671 ... -1.344753 -0.316649 \n", "3 2.497435 1.798003 ... 1.746584 2.773364 \n", "4 2.641267 1.990105 ... 1.820507 2.858623 \n", ".. ... ... ... ... ... \n", "682 -1.684573 1.727677 ... -1.035371 -1.259863 \n", "683 -1.549742 1.490069 ... -1.225386 -1.667570 \n", "684 2.542048 -0.953970 ... -1.305299 -0.533480 \n", "685 2.028961 -1.249959 ... -0.774847 -0.251140 \n", "686 2.598462 2.030291 ... -1.542930 1.686464 \n", "\n", " ENSG00000273476 ENSG00000273477 ENSG00000273478 ENSG00000273481 \\\n", "0 -1.342309 -1.003237 1.739622 -0.806837 \n", "1 -1.029295 -0.877029 0.091691 -1.382522 \n", "2 -0.929455 -0.765970 2.755466 1.703504 \n", "3 2.119670 2.275466 -0.376396 1.795157 \n", "4 -0.681997 2.397084 -0.449376 1.662613 \n", ".. ... ... ... ... \n", "682 -1.347623 -0.765765 -1.194990 -0.849804 \n", "683 -1.321766 1.866304 -0.810302 -0.911718 \n", "684 1.726591 -0.620079 2.606124 1.814809 \n", "685 -0.619712 -0.290817 -1.060838 -0.861441 \n", "686 1.476578 -0.864939 2.815260 -0.963962 \n", "\n", " ENSG00000273483 ENSG00000273486 ENSG00000273488 ENSG00000273489 \n", "0 -0.741710 -1.416639 -1.595790 -1.287100 \n", "1 1.593275 1.812576 0.022190 -0.450923 \n", "2 1.879228 -0.800586 2.708451 -0.603309 \n", "3 2.087778 2.163802 -0.182214 -0.701738 \n", "4 2.188724 2.135710 -0.063781 -0.744507 \n", ".. ... ... ... ... \n", "682 -0.636089 -1.549853 -1.125604 -0.983317 \n", "683 -0.905080 -1.759080 -0.961533 -0.874555 \n", "684 2.019223 -1.058394 -0.237949 -0.713701 \n", "685 -0.239722 -0.578209 -0.576864 2.366263 \n", "686 -0.937383 -1.502422 2.509741 -0.553535 \n", "\n", "[687 rows x 22937 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "process_genes" ] }, { "cell_type": "code", "execution_count": 3, "id": "ce6bd741", "metadata": {}, "outputs": [], "source": [ "map_dict = pd.read_pickle(\"./map_info_gene_adamson.pkl\")" ] }, { "cell_type": "code", "execution_count": 9, "id": "ed022d57", "metadata": {}, "outputs": [], "source": [ "process_genes = process_genes.loc[:,['ID'] + list(set(list(process_genes.columns)).intersection(list(map_dict.keys())))]" ] }, { "cell_type": "code", "execution_count": 10, "id": "53b8b083", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDENSG00000175911ENSG00000251562ENSG00000111726ENSG00000245025ENSG00000159239ENSG00000244184ENSG00000011426ENSG00000203279ENSG00000115226...ENSG00000138297ENSG00000157350ENSG00000174564ENSG00000251131ENSG00000103723ENSG00000227533ENSG00000204482ENSG00000031698ENSG00000179456ENSG00000261758
0AHR-1.1871840.9248381.151384-0.246859-0.915509-0.055702-2.058460-1.217151-0.671188...-0.6635812.434617-0.990983-0.9283092.2106863.132406-0.9807591.917720-1.654584-0.111339
1AIRE-0.139478-0.4897562.716674-1.4565041.570967-2.2792452.300818-1.3141151.584288...1.0766140.9781981.0241472.3182480.757521-1.4855911.2876091.6340602.869887-1.595758
2ALX12.6840112.1463972.703523-1.3234351.5710690.7972802.299649-1.3003321.518849...-1.280896-1.7983421.216875-0.792937-1.871365-1.5475771.1238411.8014010.068790-1.556525
3ALX3-0.5377251.9241342.905174-1.6293451.423508-1.8915582.534882-1.3525431.601995...-0.817488-2.177483-1.453674-0.8670320.894066-1.567094-1.6159241.8541012.643477-1.646082
4ALX4-0.589713-0.5870482.918784-1.5782101.493895-1.8518712.686122-1.2830501.578009...-0.530944-2.0988121.572730-0.8452110.897063-1.699489-1.5634641.9447052.670457-1.761020
..................................................................
682ZNF784-0.993021-1.421627-1.1780562.679437-0.861058-0.646448-1.639089-1.006553-0.913538...-0.6440762.4516822.111256-0.827870-0.843621-0.691026-1.188473-0.655782-1.360230-0.325992
683ZSCAN10-0.741595-1.3507101.344932-0.290858-0.546372-0.852250-1.906668-0.917551-0.697111...-1.1257730.022806-0.977870-0.5867142.080375-0.5303521.9871561.8627701.545124-0.365679
684ZSCAN16-0.3116292.0113282.580312-1.027307-0.8988111.1586162.2706301.7865281.718342...-1.053085-1.0957601.656072-0.6957861.2483231.9413831.5016752.0853332.612452-1.443156
685ZSCAN26-1.0269941.914048-0.460608-0.965757-1.140439-0.891246-0.873975-0.994138-1.222359...2.860900-1.6746812.265212-0.840547-1.380187-1.200555-1.500993-0.783335-0.810816-0.857389
686ZSCAN4-0.0273352.1117312.439006-0.910507-0.5666611.0443062.010582-0.893314-0.681895...-1.598408-0.4936671.540363-0.7053041.666232-1.126338-1.011217-0.4655642.609020-1.161027
\n", "

687 rows × 3347 columns

\n", "
" ], "text/plain": [ " ID ENSG00000175911 ENSG00000251562 ENSG00000111726 \\\n", "0 AHR -1.187184 0.924838 1.151384 \n", "1 AIRE -0.139478 -0.489756 2.716674 \n", "2 ALX1 2.684011 2.146397 2.703523 \n", "3 ALX3 -0.537725 1.924134 2.905174 \n", "4 ALX4 -0.589713 -0.587048 2.918784 \n", ".. ... ... ... ... \n", "682 ZNF784 -0.993021 -1.421627 -1.178056 \n", "683 ZSCAN10 -0.741595 -1.350710 1.344932 \n", "684 ZSCAN16 -0.311629 2.011328 2.580312 \n", "685 ZSCAN26 -1.026994 1.914048 -0.460608 \n", "686 ZSCAN4 -0.027335 2.111731 2.439006 \n", "\n", " ENSG00000245025 ENSG00000159239 ENSG00000244184 ENSG00000011426 \\\n", "0 -0.246859 -0.915509 -0.055702 -2.058460 \n", "1 -1.456504 1.570967 -2.279245 2.300818 \n", "2 -1.323435 1.571069 0.797280 2.299649 \n", "3 -1.629345 1.423508 -1.891558 2.534882 \n", "4 -1.578210 1.493895 -1.851871 2.686122 \n", ".. ... ... ... ... \n", "682 2.679437 -0.861058 -0.646448 -1.639089 \n", "683 -0.290858 -0.546372 -0.852250 -1.906668 \n", "684 -1.027307 -0.898811 1.158616 2.270630 \n", "685 -0.965757 -1.140439 -0.891246 -0.873975 \n", "686 -0.910507 -0.566661 1.044306 2.010582 \n", "\n", " ENSG00000203279 ENSG00000115226 ... ENSG00000138297 ENSG00000157350 \\\n", "0 -1.217151 -0.671188 ... -0.663581 2.434617 \n", "1 -1.314115 1.584288 ... 1.076614 0.978198 \n", "2 -1.300332 1.518849 ... -1.280896 -1.798342 \n", "3 -1.352543 1.601995 ... -0.817488 -2.177483 \n", "4 -1.283050 1.578009 ... -0.530944 -2.098812 \n", ".. ... ... ... ... ... \n", "682 -1.006553 -0.913538 ... -0.644076 2.451682 \n", "683 -0.917551 -0.697111 ... -1.125773 0.022806 \n", "684 1.786528 1.718342 ... -1.053085 -1.095760 \n", "685 -0.994138 -1.222359 ... 2.860900 -1.674681 \n", "686 -0.893314 -0.681895 ... -1.598408 -0.493667 \n", "\n", " ENSG00000174564 ENSG00000251131 ENSG00000103723 ENSG00000227533 \\\n", "0 -0.990983 -0.928309 2.210686 3.132406 \n", "1 1.024147 2.318248 0.757521 -1.485591 \n", "2 1.216875 -0.792937 -1.871365 -1.547577 \n", "3 -1.453674 -0.867032 0.894066 -1.567094 \n", "4 1.572730 -0.845211 0.897063 -1.699489 \n", ".. ... ... ... ... \n", "682 2.111256 -0.827870 -0.843621 -0.691026 \n", "683 -0.977870 -0.586714 2.080375 -0.530352 \n", "684 1.656072 -0.695786 1.248323 1.941383 \n", "685 2.265212 -0.840547 -1.380187 -1.200555 \n", "686 1.540363 -0.705304 1.666232 -1.126338 \n", "\n", " ENSG00000204482 ENSG00000031698 ENSG00000179456 ENSG00000261758 \n", "0 -0.980759 1.917720 -1.654584 -0.111339 \n", "1 1.287609 1.634060 2.869887 -1.595758 \n", "2 1.123841 1.801401 0.068790 -1.556525 \n", "3 -1.615924 1.854101 2.643477 -1.646082 \n", "4 -1.563464 1.944705 2.670457 -1.761020 \n", ".. ... ... ... ... \n", "682 -1.188473 -0.655782 -1.360230 -0.325992 \n", "683 1.987156 1.862770 1.545124 -0.365679 \n", "684 1.501675 2.085333 2.612452 -1.443156 \n", "685 -1.500993 -0.783335 -0.810816 -0.857389 \n", "686 -1.011217 -0.465564 2.609020 -1.161027 \n", "\n", "[687 rows x 3347 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "process_genes" ] }, { "cell_type": "code", "execution_count": 11, "id": "f8c39dc3", "metadata": {}, "outputs": [], "source": [ "col_new = [map_dict[i] for i in process_genes.columns[1:]]" ] }, { "cell_type": "code", "execution_count": 12, "id": "689b8124", "metadata": {}, "outputs": [], "source": [ "process_genes.columns = ['ID'] + col_new" ] }, { "cell_type": "code", "execution_count": 19, "id": "4be24ff1", "metadata": {}, "outputs": [], "source": [ "process_genes.to_csv(\"./k562_processed_grn.csv\",index=None)" ] }, { "cell_type": "code", "execution_count": 20, "id": "ca259434", "metadata": {}, "outputs": [], "source": [ "process_genes_new = pd.read_csv(\"./k562_processed_grn.csv\",index_col=0)" ] }, { "cell_type": "code", "execution_count": 21, "id": "29166db6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AC127496.1MALAT1CMASRP11-875O11.1C2orf81RP11-314A20.2ANLNRP11-498P14.5FNDC4USH1G...TIMM23ST3GAL2IL20RBCTD-2035E11.3AP3B2SLC2A1-AS1LST1SARSZBTB18RP11-102M11.2
ID
AHR-1.1871840.9248381.151384-0.246859-0.915509-0.055702-2.058460-1.217151-0.6711883.868693...-0.6635812.434617-0.990983-0.9283092.2106863.132406-0.9807591.917720-1.654584-0.111339
AIRE-0.139478-0.4897562.716674-1.4565041.570967-2.2792452.300818-1.3141151.584288-2.037681...1.0766140.9781981.0241472.3182480.757521-1.4855911.2876091.6340602.869887-1.595758
ALX12.6840112.1463972.703523-1.3234351.5710690.7972802.299649-1.3003321.518849-1.836830...-1.280896-1.7983421.216875-0.792937-1.871365-1.5475771.1238411.8014010.068790-1.556525
ALX3-0.5377251.9241342.905174-1.6293451.423508-1.8915582.534882-1.3525431.601995-1.815563...-0.817488-2.177483-1.453674-0.8670320.894066-1.567094-1.6159241.8541012.643477-1.646082
ALX4-0.589713-0.5870482.918784-1.5782101.493895-1.8518712.686122-1.2830501.578009-1.924612...-0.530944-2.0988121.572730-0.8452110.897063-1.699489-1.5634641.9447052.670457-1.761020
..................................................................
ZNF784-0.993021-1.421627-1.1780562.679437-0.861058-0.646448-1.639089-1.006553-0.913538-0.087599...-0.6440762.4516822.111256-0.827870-0.843621-0.691026-1.188473-0.655782-1.360230-0.325992
ZSCAN10-0.741595-1.3507101.344932-0.290858-0.546372-0.852250-1.906668-0.917551-0.697111-0.147886...-1.1257730.022806-0.977870-0.5867142.080375-0.5303521.9871561.8627701.545124-0.365679
ZSCAN16-0.3116292.0113282.580312-1.027307-0.8988111.1586162.2706301.7865281.718342-1.427522...-1.053085-1.0957601.656072-0.6957861.2483231.9413831.5016752.0853332.612452-1.443156
ZSCAN26-1.0269941.914048-0.460608-0.965757-1.140439-0.891246-0.873975-0.994138-1.222359-1.054876...2.860900-1.6746812.265212-0.840547-1.380187-1.200555-1.500993-0.783335-0.810816-0.857389
ZSCAN4-0.0273352.1117312.439006-0.910507-0.5666611.0443062.010582-0.893314-0.681895-1.171078...-1.598408-0.4936671.540363-0.7053041.666232-1.126338-1.011217-0.4655642.609020-1.161027
\n", "

687 rows × 3346 columns

\n", "
" ], "text/plain": [ " AC127496.1 MALAT1 CMAS RP11-875O11.1 C2orf81 \\\n", "ID \n", "AHR -1.187184 0.924838 1.151384 -0.246859 -0.915509 \n", "AIRE -0.139478 -0.489756 2.716674 -1.456504 1.570967 \n", "ALX1 2.684011 2.146397 2.703523 -1.323435 1.571069 \n", "ALX3 -0.537725 1.924134 2.905174 -1.629345 1.423508 \n", "ALX4 -0.589713 -0.587048 2.918784 -1.578210 1.493895 \n", "... ... ... ... ... ... \n", "ZNF784 -0.993021 -1.421627 -1.178056 2.679437 -0.861058 \n", "ZSCAN10 -0.741595 -1.350710 1.344932 -0.290858 -0.546372 \n", "ZSCAN16 -0.311629 2.011328 2.580312 -1.027307 -0.898811 \n", "ZSCAN26 -1.026994 1.914048 -0.460608 -0.965757 -1.140439 \n", "ZSCAN4 -0.027335 2.111731 2.439006 -0.910507 -0.566661 \n", "\n", " RP11-314A20.2 ANLN RP11-498P14.5 FNDC4 USH1G ... \\\n", "ID ... \n", "AHR -0.055702 -2.058460 -1.217151 -0.671188 3.868693 ... \n", "AIRE -2.279245 2.300818 -1.314115 1.584288 -2.037681 ... \n", "ALX1 0.797280 2.299649 -1.300332 1.518849 -1.836830 ... \n", "ALX3 -1.891558 2.534882 -1.352543 1.601995 -1.815563 ... \n", "ALX4 -1.851871 2.686122 -1.283050 1.578009 -1.924612 ... \n", "... ... ... ... ... ... ... \n", "ZNF784 -0.646448 -1.639089 -1.006553 -0.913538 -0.087599 ... \n", "ZSCAN10 -0.852250 -1.906668 -0.917551 -0.697111 -0.147886 ... \n", "ZSCAN16 1.158616 2.270630 1.786528 1.718342 -1.427522 ... \n", "ZSCAN26 -0.891246 -0.873975 -0.994138 -1.222359 -1.054876 ... \n", "ZSCAN4 1.044306 2.010582 -0.893314 -0.681895 -1.171078 ... \n", "\n", " TIMM23 ST3GAL2 IL20RB CTD-2035E11.3 AP3B2 SLC2A1-AS1 \\\n", "ID \n", "AHR -0.663581 2.434617 -0.990983 -0.928309 2.210686 3.132406 \n", "AIRE 1.076614 0.978198 1.024147 2.318248 0.757521 -1.485591 \n", "ALX1 -1.280896 -1.798342 1.216875 -0.792937 -1.871365 -1.547577 \n", "ALX3 -0.817488 -2.177483 -1.453674 -0.867032 0.894066 -1.567094 \n", "ALX4 -0.530944 -2.098812 1.572730 -0.845211 0.897063 -1.699489 \n", "... ... ... ... ... ... ... \n", "ZNF784 -0.644076 2.451682 2.111256 -0.827870 -0.843621 -0.691026 \n", "ZSCAN10 -1.125773 0.022806 -0.977870 -0.586714 2.080375 -0.530352 \n", "ZSCAN16 -1.053085 -1.095760 1.656072 -0.695786 1.248323 1.941383 \n", "ZSCAN26 2.860900 -1.674681 2.265212 -0.840547 -1.380187 -1.200555 \n", "ZSCAN4 -1.598408 -0.493667 1.540363 -0.705304 1.666232 -1.126338 \n", "\n", " LST1 SARS ZBTB18 RP11-102M11.2 \n", "ID \n", "AHR -0.980759 1.917720 -1.654584 -0.111339 \n", "AIRE 1.287609 1.634060 2.869887 -1.595758 \n", "ALX1 1.123841 1.801401 0.068790 -1.556525 \n", "ALX3 -1.615924 1.854101 2.643477 -1.646082 \n", "ALX4 -1.563464 1.944705 2.670457 -1.761020 \n", "... ... ... ... ... \n", "ZNF784 -1.188473 -0.655782 -1.360230 -0.325992 \n", "ZSCAN10 1.987156 1.862770 1.545124 -0.365679 \n", "ZSCAN16 1.501675 2.085333 2.612452 -1.443156 \n", "ZSCAN26 -1.500993 -0.783335 -0.810816 -0.857389 \n", "ZSCAN4 -1.011217 -0.465564 2.609020 -1.161027 \n", "\n", "[687 rows x 3346 columns]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "process_genes_new" ] }, { "cell_type": "code", "execution_count": 25, "id": "2ca5b697", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ID\n", "ZNF32 3.053641\n", "PRRX1 3.026179\n", "ISX 3.007872\n", "HESX1 3.001970\n", "UNCX 2.998282\n", "LHX2 2.973517\n", "ARX 2.968436\n", "HMX2 2.965731\n", "DLX2 2.952591\n", "HOXB5 2.951802\n", "Name: CMAS, dtype: float64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "process_genes_new.loc[:,'CMAS'].sort_values(ascending=False).iloc[0:10]" ] }, { "cell_type": "code", "execution_count": 38, "id": "272d5efd", "metadata": {}, "outputs": [], "source": [ "searchinfo =''''''\n", "finditem = process_genes_new.loc[:,'CMAS'].sort_values(ascending=False).iloc[0:10]\n", "searchinfo += '''GeneName Score\\n'''\n", "for name, sten in zip(finditem.index, finditem.values):\n", "# print(name,sten)\n", " searchinfo += f'''{name} {sten}\\n'''" ] }, { "cell_type": "code", "execution_count": 39, "id": "91bad66f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GeneName Score\n", "ZNF32 3.05364054333144\n", "PRRX1 3.02617869847161\n", "ISX 3.00787214431829\n", "HESX1 3.00196977734313\n", "UNCX 2.99828209688764\n", "LHX2 2.97351668677308\n", "ARX 2.9684361257728\n", "HMX2 2.96573101463111\n", "DLX2 2.95259128389934\n", "HOXB5 2.95180181103979\n" ] } ], "source": [ "print(searchinfo[:-1])" ] }, { "cell_type": "code", "execution_count": 30, "id": "0231662c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['ZNF32', 'PRRX1', 'ISX', 'HESX1', 'UNCX', 'LHX2', 'ARX', 'HMX2', 'DLX2',\n", " 'HOXB5'],\n", " dtype='object', name='ID')" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "process_genes_new.loc[:,'CMAS'].sort_values(ascending=False).iloc[0:10].index" ] }, { "cell_type": "code", "execution_count": 1, "id": "2025e389", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/PyPDF2/__init__.py:21: DeprecationWarning: PyPDF2 is deprecated. Please move to the pypdf library instead.\n", " warnings.warn(\n" ] } ], "source": [ "from evoagentx.tools import PertToolkit\n", "\n", "# Initialize the free Google search toolkit\n", "toolkit = PertToolkit()" ] }, { "cell_type": "code", "execution_count": 4, "id": "284d662d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CMAS K562\n" ] }, { "ename": "TypeError", "evalue": "string indices must be integers, not 'str'", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mTypeError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 11\u001b[39m\n\u001b[32m 9\u001b[39m \u001b[38;5;66;03m# Process the results\u001b[39;00m\n\u001b[32m 10\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i, result \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(results.get(\u001b[33m\"\u001b[39m\u001b[33mresults\u001b[39m\u001b[33m\"\u001b[39m, [])):\n\u001b[32m---> \u001b[39m\u001b[32m11\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mResult \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi+\u001b[32m1\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[43mresult\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mtitle\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 12\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mURL: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresult[\u001b[33m'\u001b[39m\u001b[33murl\u001b[39m\u001b[33m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 13\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mContent: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresult[\u001b[33m'\u001b[39m\u001b[33mcontent\u001b[39m\u001b[33m'\u001b[39m][:\u001b[32m150\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m...\u001b[39m\u001b[33m\"\u001b[39m)\n", "\u001b[31mTypeError\u001b[39m: string indices must be integers, not 'str'" ] } ], "source": [ "# Get the search tool\n", "search_tool = toolkit.get_tool(\"pert_search\")\n", "\n", "# Search Google for information\n", "results = search_tool(\n", " gene_name = 'CMAS', cell_line = 'K562'\n", ")\n", "\n", "# Process the results\n", "for i, result in enumerate(results.get(\"results\", [])):\n", " print(f\"Result {i+1}: {result['title']}\")\n", " print(f\"URL: {result['url']}\")\n", " print(f\"Content: {result['content'][:150]}...\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "13ead2a1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'results': 'The detected gene list and gene regulatory strength is: GeneName Score\\nZNF32 3.05364054333144\\nPRRX1 3.02617869847161\\nISX 3.00787214431829\\nHESX1 3.00196977734313\\nUNCX 2.99828209688764\\nLHX2 2.97351668677308\\nARX 2.9684361257728\\nHMX2 2.96573101463111\\nDLX2 2.95259128389934\\nHOXB5 2.95180181103979\\n'}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results" ] }, { "cell_type": "code", "execution_count": null, "id": "04eb9278", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }