From c29d13dec60e336d5f034e07836c237fed70108b Mon Sep 17 00:00:00 2001 From: Niko Papadopoulos <nikolaos.papadopoulos@embl.de> Date: Tue, 24 Jan 2023 16:52:20 +0100 Subject: [PATCH] looked at EC of second-best morphologs and why they sometimes disagree; looked at available choanos for HGT candidates --- analysis/revision-hgt-outgroup.ipynb | 284 ++++++++++ .../revision-second_best_morpholog-run.ipynb | 529 +++++++++++++++++- analysis/suppl-horizontal_gene_transfer.ipynb | 2 +- 3 files changed, 808 insertions(+), 7 deletions(-) create mode 100644 analysis/revision-hgt-outgroup.ipynb diff --git a/analysis/revision-hgt-outgroup.ipynb b/analysis/revision-hgt-outgroup.ipynb new file mode 100644 index 0000000..0856d47 --- /dev/null +++ b/analysis/revision-hgt-outgroup.ipynb @@ -0,0 +1,284 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "19a33b01-d8d4-49de-9e05-302c14fb7c42", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-01-24 16:43\n" + ] + } + ], + "source": [ + "from datetime import datetime, timezone\n", + "import pandas as pd\n", + "import pytz\n", + "\n", + "utc_dt = datetime.now(timezone.utc) # UTC time\n", + "dt = utc_dt.astimezone()\n", + "tz = pytz.timezone('Europe/Berlin')\n", + "berlin_now = datetime.now(tz)\n", + "print(f'{berlin_now:%Y-%m-%d %H:%M}')" + ] + }, + { + "cell_type": "markdown", + "id": "55f808fc-0a34-48d5-96b9-659d27f16f13", + "metadata": {}, + "source": [ + "The reviewers challenged us to look for the HGT candidates in the nearest non-metazoan outgroup, choanoflagellates. We are using _Salpingoeca rosetta_ and _Monosiga brevicollis_, two model choanoflagellates with publically available genomes." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c4ad4dc2-a115-4679-8947-c6ed78582bbd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 9376k 0 9376k 0 0 1607k 0 --:--:-- 0:00:05 --:--:-- 1472k\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 6562k 0 6562k 0 0 2390k 0 --:--:-- 0:00:02 --:--:-- 2394k\n" + ] + } + ], + "source": [ + "!curl \"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28proteome%3AUP000007799%29%29\" -o salpingoeca.faa\n", + "!curl \"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28proteome%3AUP000001357%29%29\" -o monosiga.faa\n", + "!rm -rf tmp/" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "aae09518-9f38-4c3e-90e0-f8595589e748", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%bash --out salpingoeca.out --err salpingoeca.err\n", + "hgt_candidates=\"/Users/npapadop/Documents/data/coffe/hgt.pep\"\n", + "mmseqs easy-search ${hgt_candidates} \"./salpingoeca.faa\" salpingoeca.m8 tmp --search-type 2" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9f1aa456-1f6f-4523-b923-763b603b649c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%bash --out monosiga.out --err monosiga.err\n", + "hgt_candidates=\"/Users/npapadop/Documents/data/coffe/hgt.pep\"\n", + "mmseqs easy-search ${hgt_candidates} \"./monosiga.faa\" monosiga.m8 tmp --search-type 2" + ] + }, + { + "cell_type": "markdown", + "id": "80982650-856c-4b18-be0e-501b240533ab", + "metadata": {}, + "source": [ + "Let's have a look at the results:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "169716b0-3ce2-4eb7-8e53-915c99ec5122", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " <th>4</th>\n", + " <th>5</th>\n", + " <th>6</th>\n", + " <th>7</th>\n", + " <th>8</th>\n", + " <th>9</th>\n", + " <th>10</th>\n", + " <th>11</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>c103983_g1_i1_m.71422,</td>\n", + " <td>A9V527</td>\n", + " <td>0.251</td>\n", + " <td>505</td>\n", + " <td>370</td>\n", + " <td>0</td>\n", + " <td>65</td>\n", + " <td>569</td>\n", + " <td>66</td>\n", + " <td>560</td>\n", + " <td>3.201000e-14</td>\n", + " <td>76</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 \\\n", + "0 c103983_g1_i1_m.71422, A9V527 0.251 505 370 0 65 569 66 560 \n", + "\n", + " 10 11 \n", + "0 3.201000e-14 76 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.read_csv(\"monosiga.m8\", sep=\"\\t\", header=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8c324422-faed-4249-89da-cb1dafeac4d1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " <th>4</th>\n", + " <th>5</th>\n", + " <th>6</th>\n", + " <th>7</th>\n", + " <th>8</th>\n", + " <th>9</th>\n", + " <th>10</th>\n", + " <th>11</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>c103983_g1_i1_m.71422,</td>\n", + " <td>F2UGB0</td>\n", + " <td>0.253</td>\n", + " <td>490</td>\n", + " <td>354</td>\n", + " <td>0</td>\n", + " <td>63</td>\n", + " <td>552</td>\n", + " <td>159</td>\n", + " <td>633</td>\n", + " <td>2.262000e-16</td>\n", + " <td>83</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 \\\n", + "0 c103983_g1_i1_m.71422, F2UGB0 0.253 490 354 0 63 552 159 633 \n", + "\n", + " 10 11 \n", + "0 2.262000e-16 83 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.read_csv(\"salpingoeca.m8\", sep=\"\\t\", header=None)" + ] + }, + { + "cell_type": "markdown", + "id": "8184c03c-4741-4499-a346-a31f4e4ed61c", + "metadata": {}, + "source": [ + "In both cases the only relevant hit that is found is c103983_g1, the gene EggNOG v5.0 identifies as \"metal-dependent hydrolase - Proteobacteria\" and MorF putatively identifies as an aminohydrolase." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/analysis/revision-second_best_morpholog-run.ipynb b/analysis/revision-second_best_morpholog-run.ipynb index 114266e..bc753c2 100644 --- a/analysis/revision-second_best_morpholog-run.ipynb +++ b/analysis/revision-second_best_morpholog-run.ipynb @@ -10,7 +10,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "2022-12-21 15:28\n" + "2023-01-24 15:27\n" ] } ], @@ -387,7 +387,7 @@ { "data": { "text/plain": [ - "<matplotlib.collections.PathCollection at 0x2cc05e740>" + "<matplotlib.collections.PathCollection at 0x2ceef8e80>" ] }, "execution_count": 19, @@ -729,7 +729,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 32, "id": "b7f15b68-ef6b-40ad-9368-f1d9a1220b2b", "metadata": {}, "outputs": [ @@ -739,7 +739,7 @@ "0.1088115396676074" ] }, - "execution_count": 33, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -750,7 +750,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 33, "id": "db54d8dc-302f-4dc3-8abf-03c357796d49", "metadata": {}, "outputs": [ @@ -760,7 +760,7 @@ "0.07933521480087802" ] }, - "execution_count": 38, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -769,6 +769,523 @@ "np.sum(top10p_12[~exclude] < 3) / np.sum(top10p_12[~exclude] > 3)" ] }, + { + "cell_type": "code", + "execution_count": 36, + "id": "d1c33949-76e8-4bdd-bbee-99824d43570a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.217579250720461" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.mean(top10p_12[~exclude][top10p_12[~exclude] < 4])" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "f3d9b8ec-e7fe-4708-9e90-5377a59b34e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "query\n", + "132 0\n", + "142 3\n", + "170 0\n", + "215 0\n", + "252 1\n", + " ..\n", + "41535 1\n", + "41596 0\n", + "41621 1\n", + "41703 3\n", + "41934 1\n", + "Length: 694, dtype: int64" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top10p_12[~exclude][top10p_12[~exclude] < 4]" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "614bc862-d3ae-45bb-bae4-0dd45e9ae3cf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>query</th>\n", + " <th>bit score</th>\n", + " <th>uniprot</th>\n", + " <th>eggNOG_OGs</th>\n", + " <th>Preferred_name</th>\n", + " <th>EC</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1798577</th>\n", + " <td>41596</td>\n", + " <td>367</td>\n", + " <td>Q22707</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n", + " <td>DUSP11</td>\n", + " <td>3.1.3.16,3.1.3.48</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798580</th>\n", + " <td>41596</td>\n", + " <td>356</td>\n", + " <td>C0PFH1</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n", + " <td>-</td>\n", + " <td>2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798581</th>\n", + " <td>41596</td>\n", + " <td>347</td>\n", + " <td>B7EQL6</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n", + " <td>-</td>\n", + " <td>2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798582</th>\n", + " <td>41596</td>\n", + " <td>347</td>\n", + " <td>A0A1D6EGX8</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n", + " <td>-</td>\n", + " <td>2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798583</th>\n", + " <td>41596</td>\n", + " <td>344</td>\n", + " <td>A0A0H5S9H8</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33...</td>\n", + " <td>-</td>\n", + " <td>3.1.3.16,3.1.3.48</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798584</th>\n", + " <td>41596</td>\n", + " <td>343</td>\n", + " <td>Q6NXK5</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n", + " <td>DUSP11</td>\n", + " <td>3.1.3.16,3.1.3.48</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798585</th>\n", + " <td>41596</td>\n", + " <td>337</td>\n", + " <td>Q2QWJ7</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n", + " <td>-</td>\n", + " <td>2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798588</th>\n", + " <td>41596</td>\n", + " <td>330</td>\n", + " <td>O75319</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n", + " <td>DUSP11</td>\n", + " <td>3.1.3.16,3.1.3.48</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798589</th>\n", + " <td>41596</td>\n", + " <td>328</td>\n", + " <td>Q4KM79</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n", + " <td>DUSP11</td>\n", + " <td>3.1.3.16,3.1.3.48</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798590</th>\n", + " <td>41596</td>\n", + " <td>323</td>\n", + " <td>A0A3P7GI08</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33...</td>\n", + " <td>-</td>\n", + " <td>3.1.3.16,3.1.3.48</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798592</th>\n", + " <td>41596</td>\n", + " <td>319</td>\n", + " <td>I1N9F9</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n", + " <td>-</td>\n", + " <td>2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798595</th>\n", + " <td>41596</td>\n", + " <td>296</td>\n", + " <td>E9QD92</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n", + " <td>DUSP11</td>\n", + " <td>3.1.3.16,3.1.3.48</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798596</th>\n", + " <td>41596</td>\n", + " <td>288</td>\n", + " <td>Q8GSD7</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n", + " <td>-</td>\n", + " <td>2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798597</th>\n", + " <td>41596</td>\n", + " <td>286</td>\n", + " <td>A0A0R0KQX0</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n", + " <td>-</td>\n", + " <td>2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798598</th>\n", + " <td>41596</td>\n", + " <td>276</td>\n", + " <td>F4IYM6</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n", + " <td>-</td>\n", + " <td>2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798599</th>\n", + " <td>41596</td>\n", + " <td>273</td>\n", + " <td>Q8SX38</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n", + " <td>DUSP11</td>\n", + " <td>3.1.3.16,3.1.3.48</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798600</th>\n", + " <td>41596</td>\n", + " <td>272</td>\n", + " <td>P34442</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33...</td>\n", + " <td>-</td>\n", + " <td>3.1.3.16,3.1.3.48</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798601</th>\n", + " <td>41596</td>\n", + " <td>270</td>\n", + " <td>J9BD64</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n", + " <td>DUSP11</td>\n", + " <td>3.1.3.16,3.1.3.48</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798602</th>\n", + " <td>41596</td>\n", + " <td>265</td>\n", + " <td>K7MTE7</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n", + " <td>-</td>\n", + " <td>2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798603</th>\n", + " <td>41596</td>\n", + " <td>259</td>\n", + " <td>A0A0N4U7Y1</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33...</td>\n", + " <td>-</td>\n", + " <td>3.1.3.16,3.1.3.48</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798604</th>\n", + " <td>41596</td>\n", + " <td>259</td>\n", + " <td>K7KDH1</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n", + " <td>-</td>\n", + " <td>2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798606</th>\n", + " <td>41596</td>\n", + " <td>254</td>\n", + " <td>Q6NY98</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n", + " <td>RNGTT</td>\n", + " <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798607</th>\n", + " <td>41596</td>\n", + " <td>254</td>\n", + " <td>I1KKA0</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n", + " <td>-</td>\n", + " <td>2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798608</th>\n", + " <td>41596</td>\n", + " <td>250</td>\n", + " <td>K7K355</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n", + " <td>-</td>\n", + " <td>2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798609</th>\n", + " <td>41596</td>\n", + " <td>246</td>\n", + " <td>O60942</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n", + " <td>RNGTT</td>\n", + " <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798610</th>\n", + " <td>41596</td>\n", + " <td>238</td>\n", + " <td>O55236</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n", + " <td>RNGTT</td>\n", + " <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798611</th>\n", + " <td>41596</td>\n", + " <td>238</td>\n", + " <td>Q9VY44</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n", + " <td>RNGTT</td>\n", + " <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798612</th>\n", + " <td>41596</td>\n", + " <td>233</td>\n", + " <td>A0A5K4FAB6</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n", + " <td>RNGTT</td>\n", + " <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798614</th>\n", + " <td>41596</td>\n", + " <td>226</td>\n", + " <td>Q17607</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n", + " <td>RNGTT</td>\n", + " <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798615</th>\n", + " <td>41596</td>\n", + " <td>219</td>\n", + " <td>D3ZH30</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n", + " <td>RNGTT</td>\n", + " <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798616</th>\n", + " <td>41596</td>\n", + " <td>211</td>\n", + " <td>A0A0N4UCR5</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n", + " <td>RNGTT</td>\n", + " <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1798618</th>\n", + " <td>41596</td>\n", + " <td>198</td>\n", + " <td>A0A183XJR9</td>\n", + " <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n", + " <td>RNGTT</td>\n", + " <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " query bit score uniprot \\\n", + "1798577 41596 367 Q22707 \n", + "1798580 41596 356 C0PFH1 \n", + "1798581 41596 347 B7EQL6 \n", + "1798582 41596 347 A0A1D6EGX8 \n", + "1798583 41596 344 A0A0H5S9H8 \n", + "1798584 41596 343 Q6NXK5 \n", + "1798585 41596 337 Q2QWJ7 \n", + "1798588 41596 330 O75319 \n", + "1798589 41596 328 Q4KM79 \n", + "1798590 41596 323 A0A3P7GI08 \n", + "1798592 41596 319 I1N9F9 \n", + "1798595 41596 296 E9QD92 \n", + "1798596 41596 288 Q8GSD7 \n", + "1798597 41596 286 A0A0R0KQX0 \n", + "1798598 41596 276 F4IYM6 \n", + "1798599 41596 273 Q8SX38 \n", + "1798600 41596 272 P34442 \n", + "1798601 41596 270 J9BD64 \n", + "1798602 41596 265 K7MTE7 \n", + "1798603 41596 259 A0A0N4U7Y1 \n", + "1798604 41596 259 K7KDH1 \n", + "1798606 41596 254 Q6NY98 \n", + "1798607 41596 254 I1KKA0 \n", + "1798608 41596 250 K7K355 \n", + "1798609 41596 246 O60942 \n", + "1798610 41596 238 O55236 \n", + "1798611 41596 238 Q9VY44 \n", + "1798612 41596 233 A0A5K4FAB6 \n", + "1798614 41596 226 Q17607 \n", + "1798615 41596 219 D3ZH30 \n", + "1798616 41596 211 A0A0N4UCR5 \n", + "1798618 41596 198 A0A183XJR9 \n", + "\n", + " eggNOG_OGs Preferred_name \\\n", + "1798577 COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33... DUSP11 \n", + "1798580 COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33... - \n", + "1798581 COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33... - \n", + "1798582 COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33... - \n", + "1798583 COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33... - \n", + "1798584 COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33... DUSP11 \n", + "1798585 COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33... - \n", + "1798588 COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33... DUSP11 \n", + "1798589 COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33... DUSP11 \n", + "1798590 COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33... - \n", + "1798592 COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33... - \n", + "1798595 COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33... DUSP11 \n", + "1798596 COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33... - \n", + "1798597 COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33... - \n", + "1798598 COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33... - \n", + "1798599 COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33... DUSP11 \n", + "1798600 COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33... - \n", + "1798601 COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33... DUSP11 \n", + "1798602 COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33... - \n", + "1798603 COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33... - \n", + "1798604 COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33... - \n", + "1798606 COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33... RNGTT \n", + "1798607 COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33... - \n", + "1798608 COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33... - \n", + "1798609 COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33... RNGTT \n", + "1798610 COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33... RNGTT \n", + "1798611 COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33... RNGTT \n", + "1798612 COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33... RNGTT \n", + "1798614 COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33... RNGTT \n", + "1798615 COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33... RNGTT \n", + "1798616 COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33... RNGTT \n", + "1798618 COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33... RNGTT \n", + "\n", + " EC \n", + "1798577 3.1.3.16,3.1.3.48 \n", + "1798580 2.7.7.50 \n", + "1798581 2.7.7.50 \n", + "1798582 2.7.7.50 \n", + "1798583 3.1.3.16,3.1.3.48 \n", + "1798584 3.1.3.16,3.1.3.48 \n", + "1798585 2.7.7.50 \n", + "1798588 3.1.3.16,3.1.3.48 \n", + "1798589 3.1.3.16,3.1.3.48 \n", + "1798590 3.1.3.16,3.1.3.48 \n", + "1798592 2.7.7.50 \n", + "1798595 3.1.3.16,3.1.3.48 \n", + "1798596 2.7.7.50 \n", + "1798597 2.7.7.50 \n", + "1798598 2.7.7.50 \n", + "1798599 3.1.3.16,3.1.3.48 \n", + "1798600 3.1.3.16,3.1.3.48 \n", + "1798601 3.1.3.16,3.1.3.48 \n", + "1798602 2.7.7.50 \n", + "1798603 3.1.3.16,3.1.3.48 \n", + "1798604 2.7.7.50 \n", + "1798606 1.6.5.3,1.6.99.3,2.7.7.50 \n", + "1798607 2.7.7.50 \n", + "1798608 2.7.7.50 \n", + "1798609 1.6.5.3,1.6.99.3,2.7.7.50 \n", + "1798610 1.6.5.3,1.6.99.3,2.7.7.50 \n", + "1798611 1.6.5.3,1.6.99.3,2.7.7.50 \n", + "1798612 1.6.5.3,1.6.99.3,2.7.7.50 \n", + "1798614 1.6.5.3,1.6.99.3,2.7.7.50 \n", + "1798615 1.6.5.3,1.6.99.3,2.7.7.50 \n", + "1798616 1.6.5.3,1.6.99.3,2.7.7.50 \n", + "1798618 1.6.5.3,1.6.99.3,2.7.7.50 " + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "slim[slim[\"query\"] == 41596]" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "3ee9ee24-9274-403e-a269-d43b13c63573", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAGdCAYAAADAAnMpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAj1ElEQVR4nO3df1DU953H8ddGYHESINEEWAoqXlJKsJocZOpm/JWSYiVnktaZ2l9qm9o5Gn80csYGcjO5pJPBm+FSYhMhXlHOcVrzx2pqq7HSKT9so9MgMJpWqb0jQskSxvYC0SYLyuf+cNzLys/vinxkeT5mPn98P/v5fL+f75vFffnd77IuY4wRAACAJbfYXgAAAJjcCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArIqyvYDR6O/v13vvvae4uDi5XC7bywEAAKNgjNGHH36olJQU3XLL0Nc/JkQYee+995SWlmZ7GQAAIAzt7e1KTU0d8vEJEUbi4uIkXTmZ+Ph4y6sBAACj0dPTo7S0tODr+FAmRBi5+tZMfHw8YQQAgAlmpFssuIEVAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVl1XGCkpKZHL5dJTTz017Li6ujplZ2crNjZWs2fPVkVFxfUcFgAARJCww8jbb7+tHTt2aO7cucOOa21tVX5+vhYuXKimpiYVFxdr48aN8vl84R4aAABEkLDCyIULF/SNb3xD//mf/6k77rhj2LEVFRWaMWOGysrKlJmZqbVr1+qJJ55QaWlpWAsGAACRJawwsm7dOj3yyCN6+OGHRxx77Ngx5eXlhfQtXbpUDQ0N6uvrG3ROIBBQT09PSAMAAJEpyumEvXv3qrGxUW+//faoxnd2diopKSmkLykpSZcuXdL58+fl8XgGzCkpKdHzzz/vdGnATWvWMwdHHPPu1kfGYSVjK1LPazxRQ8DhlZH29nZ9//vf1549exQbGzvqeS6XK2TbGDNo/1VFRUXq7u4Otvb2difLBAAAE4ijKyMnTpxQV1eXsrOzg32XL19WfX29XnnlFQUCAU2ZMiVkTnJysjo7O0P6urq6FBUVpenTpw96HLfbLbfb7WRpAABggnIURnJzc3Xq1KmQvm9/+9v6zGc+ox/84AcDgogkeb1e/eIXvwjpO3LkiHJychQdHR3GkgEAQCRxFEbi4uI0Z86ckL5bb71V06dPD/YXFRWpo6NDu3fvliQVFBTolVdeUWFhob773e/q2LFjqqys1M9+9rMxOgUAADCRjflfYPX7/Wprawtup6en69ChQ6qtrdV9992nH/7wh9q2bZtWrFgx1ocGAAATkONP01yrtrY2ZLuqqmrAmMWLF6uxsfF6DwUAACIQ300DAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArHIURsrLyzV37lzFx8crPj5eXq9Xb7755pDja2tr5XK5BrQzZ85c98IBAEBkiHIyODU1VVu3btXdd98tSfqv//ovPfbYY2pqalJWVtaQ81paWhQfHx/cvuuuu8JcLgAAiDSOwsjy5ctDtl988UWVl5fr+PHjw4aRxMRE3X777WEtEAAARLaw7xm5fPmy9u7dq4sXL8rr9Q479v7775fH41Fubq5qampG3HcgEFBPT09IAwAAkclxGDl16pRuu+02ud1uFRQUaP/+/br33nsHHevxeLRjxw75fD7t27dPGRkZys3NVX19/bDHKCkpUUJCQrClpaU5XSYAAJggHL1NI0kZGRlqbm7WBx98IJ/PpzVr1qiurm7QQJKRkaGMjIzgttfrVXt7u0pLS7Vo0aIhj1FUVKTCwsLgdk9PD4EEAIAI5TiMxMTEBG9gzcnJ0dtvv62XX35Zr7322qjmz58/X3v27Bl2jNvtltvtdro0AAAwAV333xkxxigQCIx6fFNTkzwez/UeFgAARAhHV0aKi4u1bNkypaWl6cMPP9TevXtVW1urw4cPS7ry9kpHR4d2794tSSorK9OsWbOUlZWl3t5e7dmzRz6fTz6fb+zPBAAATEiOwsj777+vVatWye/3KyEhQXPnztXhw4f1hS98QZLk9/vV1tYWHN/b26vNmzero6NDU6dOVVZWlg4ePKj8/PyxPQsAADBhOQojlZWVwz5eVVUVsr1lyxZt2bLF8aIAAMDkwXfTAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKschZHy8nLNnTtX8fHxio+Pl9fr1ZtvvjnsnLq6OmVnZys2NlazZ89WRUXFdS0YAABEFkdhJDU1VVu3blVDQ4MaGhr0+c9/Xo899pj+8Ic/DDq+tbVV+fn5WrhwoZqamlRcXKyNGzfK5/ONyeIBAMDEF+Vk8PLly0O2X3zxRZWXl+v48ePKysoaML6iokIzZsxQWVmZJCkzM1MNDQ0qLS3VihUrwl81AACIGGHfM3L58mXt3btXFy9elNfrHXTMsWPHlJeXF9K3dOlSNTQ0qK+vb8h9BwIB9fT0hDQAABCZHF0ZkaRTp07J6/Xq448/1m233ab9+/fr3nvvHXRsZ2enkpKSQvqSkpJ06dIlnT9/Xh6PZ9B5JSUlev75550uDQAAXGPWMwdHHPPu1kfGYSVDc3xlJCMjQ83NzTp+/Li+973vac2aNfrjH/845HiXyxWybYwZtP+TioqK1N3dHWzt7e1OlwkAACYIx1dGYmJidPfdd0uScnJy9Pbbb+vll1/Wa6+9NmBscnKyOjs7Q/q6uroUFRWl6dOnD3kMt9stt9vtdGkAAGACuu6/M2KMUSAQGPQxr9er6urqkL4jR44oJydH0dHR13toAAAQARyFkeLiYh09elTvvvuuTp06pWeffVa1tbX6xje+IenK2yurV68Oji8oKNC5c+dUWFio06dPa+fOnaqsrNTmzZvH9iwAAMCE5ehtmvfff1+rVq2S3+9XQkKC5s6dq8OHD+sLX/iCJMnv96utrS04Pj09XYcOHdKmTZv06quvKiUlRdu2beNjvQAAIMhRGKmsrBz28aqqqgF9ixcvVmNjo6NFAQCAyYPvpgEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWOQojJSUleuCBBxQXF6fExEQ9/vjjamlpGXZObW2tXC7XgHbmzJnrWjgAAIgMjsJIXV2d1q1bp+PHj6u6ulqXLl1SXl6eLl68OOLclpYW+f3+YLvnnnvCXjQAAIgcUU4GHz58OGR7165dSkxM1IkTJ7Ro0aJh5yYmJur22293vEAAABDZruueke7ubknStGnTRhx7//33y+PxKDc3VzU1NcOODQQC6unpCWkAACAyhR1GjDEqLCzUggULNGfOnCHHeTwe7dixQz6fT/v27VNGRoZyc3NVX18/5JySkhIlJCQEW1paWrjLBAAANzlHb9N80vr163Xy5En99re/HXZcRkaGMjIygtter1ft7e0qLS0d8q2doqIiFRYWBrd7enoIJAAARKiwroxs2LBBBw4cUE1NjVJTUx3Pnz9/vs6ePTvk4263W/Hx8SENAABEJkdXRowx2rBhg/bv36/a2lqlp6eHddCmpiZ5PJ6w5gIAgMjiKIysW7dOP/3pT/Xzn/9ccXFx6uzslCQlJCRo6tSpkq68xdLR0aHdu3dLksrKyjRr1ixlZWWpt7dXe/bskc/nk8/nG+NTAQAAE5GjMFJeXi5JWrJkSUj/rl279K1vfUuS5Pf71dbWFnyst7dXmzdvVkdHh6ZOnaqsrCwdPHhQ+fn517dyAAAQERy/TTOSqqqqkO0tW7Zoy5YtjhYFAAAmD76bBgAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFjlKIyUlJTogQceUFxcnBITE/X444+rpaVlxHl1dXXKzs5WbGysZs+erYqKirAXDAAAIoujMFJXV6d169bp+PHjqq6u1qVLl5SXl6eLFy8OOae1tVX5+flauHChmpqaVFxcrI0bN8rn81334gEAwMQX5WTw4cOHQ7Z37dqlxMREnThxQosWLRp0TkVFhWbMmKGysjJJUmZmphoaGlRaWqoVK1aEt2oAABAxruueke7ubknStGnThhxz7Ngx5eXlhfQtXbpUDQ0N6uvrG3ROIBBQT09PSAMAAJHJ0ZWRTzLGqLCwUAsWLNCcOXOGHNfZ2amkpKSQvqSkJF26dEnnz5+Xx+MZMKekpETPP/98uEub0GY9c3DEMe9ufWQcVoKJaqyeQ6PZz1iZiM/78azPeBrPn8VEfK6O5/NwIv5ehCvsKyPr16/XyZMn9bOf/WzEsS6XK2TbGDNo/1VFRUXq7u4Otvb29nCXCQAAbnJhXRnZsGGDDhw4oPr6eqWmpg47Njk5WZ2dnSF9XV1dioqK0vTp0wed43a75Xa7w1kaAACYYBxdGTHGaP369dq3b59+85vfKD09fcQ5Xq9X1dXVIX1HjhxRTk6OoqOjna0WAABEHEdhZN26ddqzZ49++tOfKi4uTp2dners7NRHH30UHFNUVKTVq1cHtwsKCnTu3DkVFhbq9OnT2rlzpyorK7V58+axOwsAADBhOQoj5eXl6u7u1pIlS+TxeILt9ddfD47x+/1qa2sLbqenp+vQoUOqra3Vfffdpx/+8Ifatm0bH+sFAACSHN4zcvXG0+FUVVUN6Fu8eLEaGxudHAoAAEwSfDcNAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsMpxGKmvr9fy5cuVkpIil8ulN954Y9jxtbW1crlcA9qZM2fCXTMAAIggUU4nXLx4UfPmzdO3v/1trVixYtTzWlpaFB8fH9y+6667nB4aAABEIMdhZNmyZVq2bJnjAyUmJur22293PA8AAES2cbtn5P7775fH41Fubq5qamqGHRsIBNTT0xPSAABAZLrhYcTj8WjHjh3y+Xzat2+fMjIylJubq/r6+iHnlJSUKCEhIdjS0tJu9DIBAIAljt+mcSojI0MZGRnBba/Xq/b2dpWWlmrRokWDzikqKlJhYWFwu6enh0ACAECEsvLR3vnz5+vs2bNDPu52uxUfHx/SAABAZLISRpqamuTxeGwcGgAA3GQcv01z4cIF/fnPfw5ut7a2qrm5WdOmTdOMGTNUVFSkjo4O7d69W5JUVlamWbNmKSsrS729vdqzZ498Pp98Pt/YnQUAAJiwHIeRhoYGPfTQQ8Htq/d2rFmzRlVVVfL7/Wpraws+3tvbq82bN6ujo0NTp05VVlaWDh48qPz8/DFYPgAAmOgch5ElS5bIGDPk41VVVSHbW7Zs0ZYtWxwvDAAATA58Nw0AALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwynEYqa+v1/Lly5WSkiKXy6U33nhjxDl1dXXKzs5WbGysZs+erYqKinDWCgAAIpDjMHLx4kXNmzdPr7zyyqjGt7a2Kj8/XwsXLlRTU5OKi4u1ceNG+Xw+x4sFAACRJ8rphGXLlmnZsmWjHl9RUaEZM2aorKxMkpSZmamGhgaVlpZqxYoVTg8PAAAizA2/Z+TYsWPKy8sL6Vu6dKkaGhrU19c36JxAIKCenp6QBgAAIpPjKyNOdXZ2KikpKaQvKSlJly5d0vnz5+XxeAbMKSkp0fPPP3+jlyZJmvXMwTHZz7tbHxmT/Ux2Y/XzGI3R/MzGcz3jaTzPa6yONZl/Vyfiud9sP3fc3Mbl0zQulytk2xgzaP9VRUVF6u7uDrb29vYbvkYAAGDHDb8ykpycrM7OzpC+rq4uRUVFafr06YPOcbvdcrvdN3ppAADgJnDDr4x4vV5VV1eH9B05ckQ5OTmKjo6+0YcHAAA3Ocdh5MKFC2publZzc7OkKx/dbW5uVltbm6Qrb7GsXr06OL6goEDnzp1TYWGhTp8+rZ07d6qyslKbN28emzMAAAATmuO3aRoaGvTQQw8FtwsLCyVJa9asUVVVlfx+fzCYSFJ6eroOHTqkTZs26dVXX1VKSoq2bdvGx3oBAICkMMLIkiVLgjegDqaqqmpA3+LFi9XY2Oj0UAAAYBLgu2kAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVWGFke3btys9PV2xsbHKzs7W0aNHhxxbW1srl8s1oJ05cybsRQMAgMjhOIy8/vrreuqpp/Tss8+qqalJCxcu1LJly9TW1jbsvJaWFvn9/mC75557wl40AACIHI7DyEsvvaTvfOc7Wrt2rTIzM1VWVqa0tDSVl5cPOy8xMVHJycnBNmXKlLAXDQAAIoejMNLb26sTJ04oLy8vpD8vL09vvfXWsHPvv/9+eTwe5ebmqqamZtixgUBAPT09IQ0AAEQmR2Hk/Pnzunz5spKSkkL6k5KS1NnZOegcj8ejHTt2yOfzad++fcrIyFBubq7q6+uHPE5JSYkSEhKCLS0tzckyAQDABBIVziSXyxWybYwZ0HdVRkaGMjIygtter1ft7e0qLS3VokWLBp1TVFSkwsLC4HZPTw+BBACACOXoysidd96pKVOmDLgK0tXVNeBqyXDmz5+vs2fPDvm42+1WfHx8SAMAAJHJURiJiYlRdna2qqurQ/qrq6v14IMPjno/TU1N8ng8Tg4NAAAilOO3aQoLC7Vq1Srl5OTI6/Vqx44damtrU0FBgaQrb7F0dHRo9+7dkqSysjLNmjVLWVlZ6u3t1Z49e+Tz+eTz+cb2TAAAwITkOIysXLlSf/3rX/XCCy/I7/drzpw5OnTokGbOnClJ8vv9IX9zpLe3V5s3b1ZHR4emTp2qrKwsHTx4UPn5+WN3FgAAYMIK6wbWJ598Uk8++eSgj1VVVYVsb9myRVu2bAnnMAAAYBLgu2kAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVWGFke3btys9PV2xsbHKzs7W0aNHhx1fV1en7OxsxcbGavbs2aqoqAhrsQAAIPI4DiOvv/66nnrqKT377LNqamrSwoULtWzZMrW1tQ06vrW1Vfn5+Vq4cKGamppUXFysjRs3yufzXffiAQDAxOc4jLz00kv6zne+o7Vr1yozM1NlZWVKS0tTeXn5oOMrKio0Y8YMlZWVKTMzU2vXrtUTTzyh0tLS6148AACY+KKcDO7t7dWJEyf0zDPPhPTn5eXprbfeGnTOsWPHlJeXF9K3dOlSVVZWqq+vT9HR0QPmBAIBBQKB4HZ3d7ckqaenx8lyR6U/8Pcx2c9YrW0067kRdbhZjNXPYzRGU8ebbT2jMZ5rnogmc53H6jl/s/3ujKfx/Pd3PH8WN+q8ru7XGDP8QONAR0eHkWR+97vfhfS/+OKL5tOf/vSgc+655x7z4osvhvT97ne/M5LMe++9N+ic5557zkii0Wg0Go0WAa29vX3YfOHoyshVLpcrZNsYM6BvpPGD9V9VVFSkwsLC4HZ/f7/+9re/afr06cMeZ6z19PQoLS1N7e3tio+PH7fjRgJqFx7qFh7qFj5qFx7qNjrGGH344YdKSUkZdpyjMHLnnXdqypQp6uzsDOnv6upSUlLSoHOSk5MHHR8VFaXp06cPOsftdsvtdof03X777U6WOqbi4+N5soWJ2oWHuoWHuoWP2oWHuo0sISFhxDGObmCNiYlRdna2qqurQ/qrq6v14IMPDjrH6/UOGH/kyBHl5OQMer8IAACYXBx/mqawsFA/+clPtHPnTp0+fVqbNm1SW1ubCgoKJF15i2X16tXB8QUFBTp37pwKCwt1+vRp7dy5U5WVldq8efPYnQUAAJiwHN8zsnLlSv31r3/VCy+8IL/frzlz5ujQoUOaOXOmJMnv94f8zZH09HQdOnRImzZt0quvvqqUlBRt27ZNK1asGLuzuEHcbreee+65AW8ZYWTULjzULTzULXzULjzUbWy5jBnp8zYAAAA3Dt9NAwAArCKMAAAAqwgjAADAKsIIAACwatKFkZKSEj3wwAOKi4tTYmKiHn/8cbW0tAw5/p//+Z/lcrlUVlYW0h8IBLRhwwbdeeeduvXWW/Xoo4/qL3/5yw1evT2jrdvp06f16KOPKiEhQXFxcZo/f37Ip6smW92k0dXuwoULWr9+vVJTUzV16lRlZmYO+PLJyVa78vJyzZ07N/hHpbxer958883g48YY/du//ZtSUlI0depULVmyRH/4wx9C9jHZanbVcLXr6+vTD37wA332s5/VrbfeqpSUFK1evVrvvfdeyD4mY+1Ges59Eq8NY2xUX0oTQZYuXWp27dpl3nnnHdPc3GweeeQRM2PGDHPhwoUBY/fv32/mzZtnUlJSzI9+9KOQxwoKCsynPvUpU11dbRobG81DDz1k5s2bZy5dujROZzK+RlO3P//5z2batGnm6aefNo2Njea///u/zS9/+Uvz/vvvB8dMtroZM7rarV271vzDP/yDqampMa2trea1114zU6ZMMW+88UZwzGSr3YEDB8zBgwdNS0uLaWlpMcXFxSY6Otq88847xhhjtm7dauLi4ozP5zOnTp0yK1euNB6Px/T09AT3MdlqdtVwtfvggw/Mww8/bF5//XVz5swZc+zYMfO5z33OZGdnh+xjMtZupOfcVbw2jL1JF0au1dXVZSSZurq6kP6//OUv5lOf+pR55513zMyZM0OecB988IGJjo42e/fuDfZ1dHSYW265xRw+fHi8lm7VYHVbuXKl+eY3vznkHOp2xWC1y8rKMi+88ELIuH/8x380//qv/2qMoXZX3XHHHeYnP/mJ6e/vN8nJyWbr1q3Bxz7++GOTkJBgKioqjDHU7FpXazeY3//+90aSOXfunDGG2n3StXXjteHGmHRv01yru7tbkjRt2rRgX39/v1atWqWnn35aWVlZA+acOHFCfX19ysvLC/alpKRozpw5euutt278om8C19atv79fBw8e1Kc//WktXbpUiYmJ+tznPqc33ngjOIe6XTHYc27BggU6cOCAOjo6ZIxRTU2N/vSnP2np0qWSqN3ly5e1d+9eXbx4UV6vV62trers7Ayph9vt1uLFi4P1mOw1u+ra2g2mu7tbLpcr+B1g1G7wuvHacONM6jBijFFhYaEWLFigOXPmBPv//d//XVFRUdq4ceOg8zo7OxUTE6M77rgjpD8pKWnAlwJGosHq1tXVpQsXLmjr1q364he/qCNHjuhLX/qSvvzlL6uurk4SdZOGfs5t27ZN9957r1JTUxUTE6MvfvGL2r59uxYsWCBp8tbu1KlTuu222+R2u1VQUKD9+/fr3nvvDZ7ztV/Q+cl6TNaaXTVU7a718ccf65lnntHXv/714Be+TebaDVc3XhtuHMd/Dj6SrF+/XidPntRvf/vbYN+JEyf08ssvq7GxUS6Xy9H+jDGO50xEg9Wtv79fkvTYY49p06ZNkqT77rtPb731lioqKrR48eIh9zdZ6iYNXjvpShg5fvy4Dhw4oJkzZ6q+vl5PPvmkPB6PHn744SH3F+m1y8jIUHNzsz744AP5fD6tWbMmGG4lDTj30dQj0mt21VC1+2Qg6evr01e/+lX19/dr+/btI+5zMtRuqLp99NFHvDbcQJP2ysiGDRt04MAB1dTUKDU1Ndh/9OhRdXV1acaMGYqKilJUVJTOnTunf/mXf9GsWbMkScnJyert7dX//u//huyzq6trwP/UIs1QdbvzzjsVFRU14H9emZmZwU/TTOa6SUPX7qOPPlJxcbFeeuklLV++XHPnztX69eu1cuVKlZaWSpq8tYuJidHdd9+tnJwclZSUaN68eXr55ZeVnJwsSQP+t/nJekzWml01VO2u6uvr01e+8hW1traquro6eFVEmty1G6puvDbcWJMujBhjtH79eu3bt0+/+c1vlJ6eHvL4qlWrdPLkSTU3NwdbSkqKnn76af3qV7+SJGVnZys6OlrV1dXBeX6/X++8844efPDBcT2f8TJS3WJiYvTAAw8M+Mjqn/70p+CXKE7Gukkj166vr099fX265ZbQX8cpU6YErzhN1tpdyxijQCCg9PR0JScnh9Sjt7dXdXV1wXpQs1BXayf9fxA5e/asfv3rX2v69OkhY6nd/7taN14bbrBxv2XWsu9973smISHB1NbWGr/fH2x///vfh5xz7R3Txlz5+FZqaqr59a9/bRobG83nP//5iP741mjqtm/fPhMdHW127Nhhzp49a3784x+bKVOmmKNHjwbHTLa6GTO62i1evNhkZWWZmpoa8z//8z9m165dJjY21mzfvj04ZrLVrqioyNTX15vW1lZz8uRJU1xcbG655RZz5MgRY8yVj/YmJCSYffv2mVOnTpmvfe1rg360dzLV7KrhatfX12ceffRRk5qaapqbm0Oek4FAILiPyVi7kZ5z1+K1YexMujAiadC2a9euIecM9oT76KOPzPr16820adPM1KlTzT/90z+Ztra2G7t4i0Zbt8rKSnP33Xeb2NhYM2/evJC/k2HM5KubMaOrnd/vN9/61rdMSkqKiY2NNRkZGeY//uM/TH9/f3DMZKvdE088YWbOnGliYmLMXXfdZXJzc0NeFPr7+81zzz1nkpOTjdvtNosWLTKnTp0K2cdkq9lVw9WutbV1yOdkTU1NcB+TsXYjPeeuxWvD2HEZY8z4XYcBAAAINenuGQEAADcXwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACr/g9F6UqqHOUOLAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots()\n", + "ax.hist(slim[slim[\"query\"] == 142][\"bit score\"], bins=50);" + ] + }, { "cell_type": "code", "execution_count": 37, diff --git a/analysis/suppl-horizontal_gene_transfer.ipynb b/analysis/suppl-horizontal_gene_transfer.ipynb index d6a508d..a074e64 100644 --- a/analysis/suppl-horizontal_gene_transfer.ipynb +++ b/analysis/suppl-horizontal_gene_transfer.ipynb @@ -677,7 +677,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.8" } }, "nbformat": 4, -- GitLab