From c29d13dec60e336d5f034e07836c237fed70108b Mon Sep 17 00:00:00 2001
From: Niko Papadopoulos <nikolaos.papadopoulos@embl.de>
Date: Tue, 24 Jan 2023 16:52:20 +0100
Subject: [PATCH] looked at EC of second-best morphologs and why they sometimes
 disagree; looked at available choanos for HGT candidates

---
 analysis/revision-hgt-outgroup.ipynb          | 284 ++++++++++
 .../revision-second_best_morpholog-run.ipynb  | 529 +++++++++++++++++-
 analysis/suppl-horizontal_gene_transfer.ipynb |   2 +-
 3 files changed, 808 insertions(+), 7 deletions(-)
 create mode 100644 analysis/revision-hgt-outgroup.ipynb

diff --git a/analysis/revision-hgt-outgroup.ipynb b/analysis/revision-hgt-outgroup.ipynb
new file mode 100644
index 0000000..0856d47
--- /dev/null
+++ b/analysis/revision-hgt-outgroup.ipynb
@@ -0,0 +1,284 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "19a33b01-d8d4-49de-9e05-302c14fb7c42",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-24 16:43\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datetime import datetime, timezone\n",
+    "import pandas as pd\n",
+    "import pytz\n",
+    "\n",
+    "utc_dt = datetime.now(timezone.utc) # UTC time\n",
+    "dt = utc_dt.astimezone()\n",
+    "tz = pytz.timezone('Europe/Berlin')\n",
+    "berlin_now = datetime.now(tz)\n",
+    "print(f'{berlin_now:%Y-%m-%d %H:%M}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "55f808fc-0a34-48d5-96b9-659d27f16f13",
+   "metadata": {},
+   "source": [
+    "The reviewers challenged us to look for the HGT candidates in the nearest non-metazoan outgroup, choanoflagellates. We are using _Salpingoeca rosetta_ and _Monosiga brevicollis_, two model choanoflagellates with publically available genomes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c4ad4dc2-a115-4679-8947-c6ed78582bbd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+      "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+      "100 9376k    0 9376k    0     0  1607k      0 --:--:--  0:00:05 --:--:-- 1472k\n",
+      "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+      "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+      "100 6562k    0 6562k    0     0  2390k      0 --:--:--  0:00:02 --:--:-- 2394k\n"
+     ]
+    }
+   ],
+   "source": [
+    "!curl \"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28proteome%3AUP000007799%29%29\" -o salpingoeca.faa\n",
+    "!curl \"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28proteome%3AUP000001357%29%29\" -o monosiga.faa\n",
+    "!rm -rf tmp/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "aae09518-9f38-4c3e-90e0-f8595589e748",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "%%bash --out salpingoeca.out --err salpingoeca.err\n",
+    "hgt_candidates=\"/Users/npapadop/Documents/data/coffe/hgt.pep\"\n",
+    "mmseqs easy-search ${hgt_candidates} \"./salpingoeca.faa\" salpingoeca.m8 tmp --search-type 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "9f1aa456-1f6f-4523-b923-763b603b649c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "%%bash --out monosiga.out --err monosiga.err\n",
+    "hgt_candidates=\"/Users/npapadop/Documents/data/coffe/hgt.pep\"\n",
+    "mmseqs easy-search ${hgt_candidates} \"./monosiga.faa\" monosiga.m8 tmp --search-type 2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "80982650-856c-4b18-be0e-501b240533ab",
+   "metadata": {},
+   "source": [
+    "Let's have a look at the results:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "169716b0-3ce2-4eb7-8e53-915c99ec5122",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>10</th>\n",
+       "      <th>11</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>c103983_g1_i1_m.71422,</td>\n",
+       "      <td>A9V527</td>\n",
+       "      <td>0.251</td>\n",
+       "      <td>505</td>\n",
+       "      <td>370</td>\n",
+       "      <td>0</td>\n",
+       "      <td>65</td>\n",
+       "      <td>569</td>\n",
+       "      <td>66</td>\n",
+       "      <td>560</td>\n",
+       "      <td>3.201000e-14</td>\n",
+       "      <td>76</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                       0       1      2    3    4   5   6    7   8    9   \\\n",
+       "0  c103983_g1_i1_m.71422,  A9V527  0.251  505  370   0  65  569  66  560   \n",
+       "\n",
+       "             10  11  \n",
+       "0  3.201000e-14  76  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_csv(\"monosiga.m8\", sep=\"\\t\", header=None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "8c324422-faed-4249-89da-cb1dafeac4d1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>10</th>\n",
+       "      <th>11</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>c103983_g1_i1_m.71422,</td>\n",
+       "      <td>F2UGB0</td>\n",
+       "      <td>0.253</td>\n",
+       "      <td>490</td>\n",
+       "      <td>354</td>\n",
+       "      <td>0</td>\n",
+       "      <td>63</td>\n",
+       "      <td>552</td>\n",
+       "      <td>159</td>\n",
+       "      <td>633</td>\n",
+       "      <td>2.262000e-16</td>\n",
+       "      <td>83</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                       0       1      2    3    4   5   6    7    8    9   \\\n",
+       "0  c103983_g1_i1_m.71422,  F2UGB0  0.253  490  354   0  63  552  159  633   \n",
+       "\n",
+       "             10  11  \n",
+       "0  2.262000e-16  83  "
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_csv(\"salpingoeca.m8\", sep=\"\\t\", header=None)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8184c03c-4741-4499-a346-a31f4e4ed61c",
+   "metadata": {},
+   "source": [
+    "In both cases the only relevant hit that is found is c103983_g1, the gene EggNOG v5.0 identifies as \"metal-dependent hydrolase - Proteobacteria\" and MorF putatively identifies as an aminohydrolase."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/analysis/revision-second_best_morpholog-run.ipynb b/analysis/revision-second_best_morpholog-run.ipynb
index 114266e..bc753c2 100644
--- a/analysis/revision-second_best_morpholog-run.ipynb
+++ b/analysis/revision-second_best_morpholog-run.ipynb
@@ -10,7 +10,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2022-12-21 15:28\n"
+      "2023-01-24 15:27\n"
      ]
     }
    ],
@@ -387,7 +387,7 @@
     {
      "data": {
       "text/plain": [
-       "<matplotlib.collections.PathCollection at 0x2cc05e740>"
+       "<matplotlib.collections.PathCollection at 0x2ceef8e80>"
       ]
      },
      "execution_count": 19,
@@ -729,7 +729,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 32,
    "id": "b7f15b68-ef6b-40ad-9368-f1d9a1220b2b",
    "metadata": {},
    "outputs": [
@@ -739,7 +739,7 @@
        "0.1088115396676074"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -750,7 +750,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 33,
    "id": "db54d8dc-302f-4dc3-8abf-03c357796d49",
    "metadata": {},
    "outputs": [
@@ -760,7 +760,7 @@
        "0.07933521480087802"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -769,6 +769,523 @@
     "np.sum(top10p_12[~exclude] < 3) / np.sum(top10p_12[~exclude] > 3)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "d1c33949-76e8-4bdd-bbee-99824d43570a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.217579250720461"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.mean(top10p_12[~exclude][top10p_12[~exclude] < 4])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "f3d9b8ec-e7fe-4708-9e90-5377a59b34e7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "query\n",
+       "132      0\n",
+       "142      3\n",
+       "170      0\n",
+       "215      0\n",
+       "252      1\n",
+       "        ..\n",
+       "41535    1\n",
+       "41596    0\n",
+       "41621    1\n",
+       "41703    3\n",
+       "41934    1\n",
+       "Length: 694, dtype: int64"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "top10p_12[~exclude][top10p_12[~exclude] < 4]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "id": "614bc862-d3ae-45bb-bae4-0dd45e9ae3cf",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>query</th>\n",
+       "      <th>bit score</th>\n",
+       "      <th>uniprot</th>\n",
+       "      <th>eggNOG_OGs</th>\n",
+       "      <th>Preferred_name</th>\n",
+       "      <th>EC</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1798577</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>367</td>\n",
+       "      <td>Q22707</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n",
+       "      <td>DUSP11</td>\n",
+       "      <td>3.1.3.16,3.1.3.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798580</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>356</td>\n",
+       "      <td>C0PFH1</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798581</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>347</td>\n",
+       "      <td>B7EQL6</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798582</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>347</td>\n",
+       "      <td>A0A1D6EGX8</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798583</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>344</td>\n",
+       "      <td>A0A0H5S9H8</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>3.1.3.16,3.1.3.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798584</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>343</td>\n",
+       "      <td>Q6NXK5</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n",
+       "      <td>DUSP11</td>\n",
+       "      <td>3.1.3.16,3.1.3.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798585</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>337</td>\n",
+       "      <td>Q2QWJ7</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798588</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>330</td>\n",
+       "      <td>O75319</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n",
+       "      <td>DUSP11</td>\n",
+       "      <td>3.1.3.16,3.1.3.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798589</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>328</td>\n",
+       "      <td>Q4KM79</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n",
+       "      <td>DUSP11</td>\n",
+       "      <td>3.1.3.16,3.1.3.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798590</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>323</td>\n",
+       "      <td>A0A3P7GI08</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>3.1.3.16,3.1.3.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798592</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>319</td>\n",
+       "      <td>I1N9F9</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798595</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>296</td>\n",
+       "      <td>E9QD92</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n",
+       "      <td>DUSP11</td>\n",
+       "      <td>3.1.3.16,3.1.3.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798596</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>288</td>\n",
+       "      <td>Q8GSD7</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798597</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>286</td>\n",
+       "      <td>A0A0R0KQX0</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798598</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>276</td>\n",
+       "      <td>F4IYM6</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798599</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>273</td>\n",
+       "      <td>Q8SX38</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n",
+       "      <td>DUSP11</td>\n",
+       "      <td>3.1.3.16,3.1.3.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798600</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>272</td>\n",
+       "      <td>P34442</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>3.1.3.16,3.1.3.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798601</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>270</td>\n",
+       "      <td>J9BD64</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...</td>\n",
+       "      <td>DUSP11</td>\n",
+       "      <td>3.1.3.16,3.1.3.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798602</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>265</td>\n",
+       "      <td>K7MTE7</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798603</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>259</td>\n",
+       "      <td>A0A0N4U7Y1</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>3.1.3.16,3.1.3.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798604</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>259</td>\n",
+       "      <td>K7KDH1</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798606</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>254</td>\n",
+       "      <td>Q6NY98</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n",
+       "      <td>RNGTT</td>\n",
+       "      <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798607</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>254</td>\n",
+       "      <td>I1KKA0</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798608</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>250</td>\n",
+       "      <td>K7K355</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...</td>\n",
+       "      <td>-</td>\n",
+       "      <td>2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798609</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>246</td>\n",
+       "      <td>O60942</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n",
+       "      <td>RNGTT</td>\n",
+       "      <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798610</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>238</td>\n",
+       "      <td>O55236</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n",
+       "      <td>RNGTT</td>\n",
+       "      <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798611</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>238</td>\n",
+       "      <td>Q9VY44</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n",
+       "      <td>RNGTT</td>\n",
+       "      <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798612</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>233</td>\n",
+       "      <td>A0A5K4FAB6</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n",
+       "      <td>RNGTT</td>\n",
+       "      <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798614</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>226</td>\n",
+       "      <td>Q17607</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n",
+       "      <td>RNGTT</td>\n",
+       "      <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798615</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>219</td>\n",
+       "      <td>D3ZH30</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n",
+       "      <td>RNGTT</td>\n",
+       "      <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798616</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>211</td>\n",
+       "      <td>A0A0N4UCR5</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n",
+       "      <td>RNGTT</td>\n",
+       "      <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1798618</th>\n",
+       "      <td>41596</td>\n",
+       "      <td>198</td>\n",
+       "      <td>A0A183XJR9</td>\n",
+       "      <td>COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...</td>\n",
+       "      <td>RNGTT</td>\n",
+       "      <td>1.6.5.3,1.6.99.3,2.7.7.50</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         query  bit score     uniprot  \\\n",
+       "1798577  41596        367      Q22707   \n",
+       "1798580  41596        356      C0PFH1   \n",
+       "1798581  41596        347      B7EQL6   \n",
+       "1798582  41596        347  A0A1D6EGX8   \n",
+       "1798583  41596        344  A0A0H5S9H8   \n",
+       "1798584  41596        343      Q6NXK5   \n",
+       "1798585  41596        337      Q2QWJ7   \n",
+       "1798588  41596        330      O75319   \n",
+       "1798589  41596        328      Q4KM79   \n",
+       "1798590  41596        323  A0A3P7GI08   \n",
+       "1798592  41596        319      I1N9F9   \n",
+       "1798595  41596        296      E9QD92   \n",
+       "1798596  41596        288      Q8GSD7   \n",
+       "1798597  41596        286  A0A0R0KQX0   \n",
+       "1798598  41596        276      F4IYM6   \n",
+       "1798599  41596        273      Q8SX38   \n",
+       "1798600  41596        272      P34442   \n",
+       "1798601  41596        270      J9BD64   \n",
+       "1798602  41596        265      K7MTE7   \n",
+       "1798603  41596        259  A0A0N4U7Y1   \n",
+       "1798604  41596        259      K7KDH1   \n",
+       "1798606  41596        254      Q6NY98   \n",
+       "1798607  41596        254      I1KKA0   \n",
+       "1798608  41596        250      K7K355   \n",
+       "1798609  41596        246      O60942   \n",
+       "1798610  41596        238      O55236   \n",
+       "1798611  41596        238      Q9VY44   \n",
+       "1798612  41596        233  A0A5K4FAB6   \n",
+       "1798614  41596        226      Q17607   \n",
+       "1798615  41596        219      D3ZH30   \n",
+       "1798616  41596        211  A0A0N4UCR5   \n",
+       "1798618  41596        198  A0A183XJR9   \n",
+       "\n",
+       "                                                eggNOG_OGs Preferred_name  \\\n",
+       "1798577  COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...         DUSP11   \n",
+       "1798580  COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...              -   \n",
+       "1798581  COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...              -   \n",
+       "1798582  COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...              -   \n",
+       "1798583  COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33...              -   \n",
+       "1798584  COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...         DUSP11   \n",
+       "1798585  COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...              -   \n",
+       "1798588  COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...         DUSP11   \n",
+       "1798589  COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...         DUSP11   \n",
+       "1798590  COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33...              -   \n",
+       "1798592  COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...              -   \n",
+       "1798595  COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...         DUSP11   \n",
+       "1798596  COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...              -   \n",
+       "1798597  COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...              -   \n",
+       "1798598  COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...              -   \n",
+       "1798599  COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...         DUSP11   \n",
+       "1798600  COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33...              -   \n",
+       "1798601  COG5226@1|root,KOG2386@2759|Eukaryota,38WCH@33...         DUSP11   \n",
+       "1798602  COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...              -   \n",
+       "1798603  COG5226@1|root,KOG2386@2759|Eukaryota,3A2XY@33...              -   \n",
+       "1798604  COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...              -   \n",
+       "1798606  COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...          RNGTT   \n",
+       "1798607  COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...              -   \n",
+       "1798608  COG5226@1|root,KOG2386@2759|Eukaryota,37QNS@33...              -   \n",
+       "1798609  COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...          RNGTT   \n",
+       "1798610  COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...          RNGTT   \n",
+       "1798611  COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...          RNGTT   \n",
+       "1798612  COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...          RNGTT   \n",
+       "1798614  COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...          RNGTT   \n",
+       "1798615  COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...          RNGTT   \n",
+       "1798616  COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...          RNGTT   \n",
+       "1798618  COG5226@1|root,KOG2386@2759|Eukaryota,38DZV@33...          RNGTT   \n",
+       "\n",
+       "                                EC  \n",
+       "1798577          3.1.3.16,3.1.3.48  \n",
+       "1798580                   2.7.7.50  \n",
+       "1798581                   2.7.7.50  \n",
+       "1798582                   2.7.7.50  \n",
+       "1798583          3.1.3.16,3.1.3.48  \n",
+       "1798584          3.1.3.16,3.1.3.48  \n",
+       "1798585                   2.7.7.50  \n",
+       "1798588          3.1.3.16,3.1.3.48  \n",
+       "1798589          3.1.3.16,3.1.3.48  \n",
+       "1798590          3.1.3.16,3.1.3.48  \n",
+       "1798592                   2.7.7.50  \n",
+       "1798595          3.1.3.16,3.1.3.48  \n",
+       "1798596                   2.7.7.50  \n",
+       "1798597                   2.7.7.50  \n",
+       "1798598                   2.7.7.50  \n",
+       "1798599          3.1.3.16,3.1.3.48  \n",
+       "1798600          3.1.3.16,3.1.3.48  \n",
+       "1798601          3.1.3.16,3.1.3.48  \n",
+       "1798602                   2.7.7.50  \n",
+       "1798603          3.1.3.16,3.1.3.48  \n",
+       "1798604                   2.7.7.50  \n",
+       "1798606  1.6.5.3,1.6.99.3,2.7.7.50  \n",
+       "1798607                   2.7.7.50  \n",
+       "1798608                   2.7.7.50  \n",
+       "1798609  1.6.5.3,1.6.99.3,2.7.7.50  \n",
+       "1798610  1.6.5.3,1.6.99.3,2.7.7.50  \n",
+       "1798611  1.6.5.3,1.6.99.3,2.7.7.50  \n",
+       "1798612  1.6.5.3,1.6.99.3,2.7.7.50  \n",
+       "1798614  1.6.5.3,1.6.99.3,2.7.7.50  \n",
+       "1798615  1.6.5.3,1.6.99.3,2.7.7.50  \n",
+       "1798616  1.6.5.3,1.6.99.3,2.7.7.50  \n",
+       "1798618  1.6.5.3,1.6.99.3,2.7.7.50  "
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "slim[slim[\"query\"] == 41596]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "id": "3ee9ee24-9274-403e-a269-d43b13c63573",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAGdCAYAAADAAnMpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAj1ElEQVR4nO3df1DU953H8ddGYHESINEEWAoqXlJKsJocZOpm/JWSYiVnktaZ2l9qm9o5Gn80csYGcjO5pJPBm+FSYhMhXlHOcVrzx2pqq7HSKT9so9MgMJpWqb0jQskSxvYC0SYLyuf+cNzLys/vinxkeT5mPn98P/v5fL+f75vFffnd77IuY4wRAACAJbfYXgAAAJjcCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArIqyvYDR6O/v13vvvae4uDi5XC7bywEAAKNgjNGHH36olJQU3XLL0Nc/JkQYee+995SWlmZ7GQAAIAzt7e1KTU0d8vEJEUbi4uIkXTmZ+Ph4y6sBAACj0dPTo7S0tODr+FAmRBi5+tZMfHw8YQQAgAlmpFssuIEVAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVl1XGCkpKZHL5dJTTz017Li6ujplZ2crNjZWs2fPVkVFxfUcFgAARJCww8jbb7+tHTt2aO7cucOOa21tVX5+vhYuXKimpiYVFxdr48aN8vl84R4aAABEkLDCyIULF/SNb3xD//mf/6k77rhj2LEVFRWaMWOGysrKlJmZqbVr1+qJJ55QaWlpWAsGAACRJawwsm7dOj3yyCN6+OGHRxx77Ngx5eXlhfQtXbpUDQ0N6uvrG3ROIBBQT09PSAMAAJEpyumEvXv3qrGxUW+//faoxnd2diopKSmkLykpSZcuXdL58+fl8XgGzCkpKdHzzz/vdGnATWvWMwdHHPPu1kfGYSVjK1LPazxRQ8DhlZH29nZ9//vf1549exQbGzvqeS6XK2TbGDNo/1VFRUXq7u4Otvb2difLBAAAE4ijKyMnTpxQV1eXsrOzg32XL19WfX29XnnlFQUCAU2ZMiVkTnJysjo7O0P6urq6FBUVpenTpw96HLfbLbfb7WRpAABggnIURnJzc3Xq1KmQvm9/+9v6zGc+ox/84AcDgogkeb1e/eIXvwjpO3LkiHJychQdHR3GkgEAQCRxFEbi4uI0Z86ckL5bb71V06dPD/YXFRWpo6NDu3fvliQVFBTolVdeUWFhob773e/q2LFjqqys1M9+9rMxOgUAADCRjflfYPX7/Wprawtup6en69ChQ6qtrdV9992nH/7wh9q2bZtWrFgx1ocGAAATkONP01yrtrY2ZLuqqmrAmMWLF6uxsfF6DwUAACIQ300DAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArHIURsrLyzV37lzFx8crPj5eXq9Xb7755pDja2tr5XK5BrQzZ85c98IBAEBkiHIyODU1VVu3btXdd98tSfqv//ovPfbYY2pqalJWVtaQ81paWhQfHx/cvuuuu8JcLgAAiDSOwsjy5ctDtl988UWVl5fr+PHjw4aRxMRE3X777WEtEAAARLaw7xm5fPmy9u7dq4sXL8rr9Q479v7775fH41Fubq5qampG3HcgEFBPT09IAwAAkclxGDl16pRuu+02ud1uFRQUaP/+/br33nsHHevxeLRjxw75fD7t27dPGRkZys3NVX19/bDHKCkpUUJCQrClpaU5XSYAAJggHL1NI0kZGRlqbm7WBx98IJ/PpzVr1qiurm7QQJKRkaGMjIzgttfrVXt7u0pLS7Vo0aIhj1FUVKTCwsLgdk9PD4EEAIAI5TiMxMTEBG9gzcnJ0dtvv62XX35Zr7322qjmz58/X3v27Bl2jNvtltvtdro0AAAwAV333xkxxigQCIx6fFNTkzwez/UeFgAARAhHV0aKi4u1bNkypaWl6cMPP9TevXtVW1urw4cPS7ry9kpHR4d2794tSSorK9OsWbOUlZWl3t5e7dmzRz6fTz6fb+zPBAAATEiOwsj777+vVatWye/3KyEhQXPnztXhw4f1hS98QZLk9/vV1tYWHN/b26vNmzero6NDU6dOVVZWlg4ePKj8/PyxPQsAADBhOQojlZWVwz5eVVUVsr1lyxZt2bLF8aIAAMDkwXfTAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKschZHy8nLNnTtX8fHxio+Pl9fr1ZtvvjnsnLq6OmVnZys2NlazZ89WRUXFdS0YAABEFkdhJDU1VVu3blVDQ4MaGhr0+c9/Xo899pj+8Ic/DDq+tbVV+fn5WrhwoZqamlRcXKyNGzfK5/ONyeIBAMDEF+Vk8PLly0O2X3zxRZWXl+v48ePKysoaML6iokIzZsxQWVmZJCkzM1MNDQ0qLS3VihUrwl81AACIGGHfM3L58mXt3btXFy9elNfrHXTMsWPHlJeXF9K3dOlSNTQ0qK+vb8h9BwIB9fT0hDQAABCZHF0ZkaRTp07J6/Xq448/1m233ab9+/fr3nvvHXRsZ2enkpKSQvqSkpJ06dIlnT9/Xh6PZ9B5JSUlev75550uDQAAXGPWMwdHHPPu1kfGYSVDc3xlJCMjQ83NzTp+/Li+973vac2aNfrjH/845HiXyxWybYwZtP+TioqK1N3dHWzt7e1OlwkAACYIx1dGYmJidPfdd0uScnJy9Pbbb+vll1/Wa6+9NmBscnKyOjs7Q/q6uroUFRWl6dOnD3kMt9stt9vtdGkAAGACuu6/M2KMUSAQGPQxr9er6urqkL4jR44oJydH0dHR13toAAAQARyFkeLiYh09elTvvvuuTp06pWeffVa1tbX6xje+IenK2yurV68Oji8oKNC5c+dUWFio06dPa+fOnaqsrNTmzZvH9iwAAMCE5ehtmvfff1+rVq2S3+9XQkKC5s6dq8OHD+sLX/iCJMnv96utrS04Pj09XYcOHdKmTZv06quvKiUlRdu2beNjvQAAIMhRGKmsrBz28aqqqgF9ixcvVmNjo6NFAQCAyYPvpgEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWOQojJSUleuCBBxQXF6fExEQ9/vjjamlpGXZObW2tXC7XgHbmzJnrWjgAAIgMjsJIXV2d1q1bp+PHj6u6ulqXLl1SXl6eLl68OOLclpYW+f3+YLvnnnvCXjQAAIgcUU4GHz58OGR7165dSkxM1IkTJ7Ro0aJh5yYmJur22293vEAAABDZruueke7ubknStGnTRhx7//33y+PxKDc3VzU1NcOODQQC6unpCWkAACAyhR1GjDEqLCzUggULNGfOnCHHeTwe7dixQz6fT/v27VNGRoZyc3NVX18/5JySkhIlJCQEW1paWrjLBAAANzlHb9N80vr163Xy5En99re/HXZcRkaGMjIygtter1ft7e0qLS0d8q2doqIiFRYWBrd7enoIJAAARKiwroxs2LBBBw4cUE1NjVJTUx3Pnz9/vs6ePTvk4263W/Hx8SENAABEJkdXRowx2rBhg/bv36/a2lqlp6eHddCmpiZ5PJ6w5gIAgMjiKIysW7dOP/3pT/Xzn/9ccXFx6uzslCQlJCRo6tSpkq68xdLR0aHdu3dLksrKyjRr1ixlZWWpt7dXe/bskc/nk8/nG+NTAQAAE5GjMFJeXi5JWrJkSUj/rl279K1vfUuS5Pf71dbWFnyst7dXmzdvVkdHh6ZOnaqsrCwdPHhQ+fn517dyAAAQERy/TTOSqqqqkO0tW7Zoy5YtjhYFAAAmD76bBgAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFjlKIyUlJTogQceUFxcnBITE/X444+rpaVlxHl1dXXKzs5WbGysZs+erYqKirAXDAAAIoujMFJXV6d169bp+PHjqq6u1qVLl5SXl6eLFy8OOae1tVX5+flauHChmpqaVFxcrI0bN8rn81334gEAwMQX5WTw4cOHQ7Z37dqlxMREnThxQosWLRp0TkVFhWbMmKGysjJJUmZmphoaGlRaWqoVK1aEt2oAABAxruueke7ubknStGnThhxz7Ngx5eXlhfQtXbpUDQ0N6uvrG3ROIBBQT09PSAMAAJHJ0ZWRTzLGqLCwUAsWLNCcOXOGHNfZ2amkpKSQvqSkJF26dEnnz5+Xx+MZMKekpETPP/98uEub0GY9c3DEMe9ufWQcVoKJaqyeQ6PZz1iZiM/78azPeBrPn8VEfK6O5/NwIv5ehCvsKyPr16/XyZMn9bOf/WzEsS6XK2TbGDNo/1VFRUXq7u4Otvb29nCXCQAAbnJhXRnZsGGDDhw4oPr6eqWmpg47Njk5WZ2dnSF9XV1dioqK0vTp0wed43a75Xa7w1kaAACYYBxdGTHGaP369dq3b59+85vfKD09fcQ5Xq9X1dXVIX1HjhxRTk6OoqOjna0WAABEHEdhZN26ddqzZ49++tOfKi4uTp2dners7NRHH30UHFNUVKTVq1cHtwsKCnTu3DkVFhbq9OnT2rlzpyorK7V58+axOwsAADBhOQoj5eXl6u7u1pIlS+TxeILt9ddfD47x+/1qa2sLbqenp+vQoUOqra3Vfffdpx/+8Ifatm0bH+sFAACSHN4zcvXG0+FUVVUN6Fu8eLEaGxudHAoAAEwSfDcNAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsMpxGKmvr9fy5cuVkpIil8ulN954Y9jxtbW1crlcA9qZM2fCXTMAAIggUU4nXLx4UfPmzdO3v/1trVixYtTzWlpaFB8fH9y+6667nB4aAABEIMdhZNmyZVq2bJnjAyUmJur22293PA8AAES2cbtn5P7775fH41Fubq5qamqGHRsIBNTT0xPSAABAZLrhYcTj8WjHjh3y+Xzat2+fMjIylJubq/r6+iHnlJSUKCEhIdjS0tJu9DIBAIAljt+mcSojI0MZGRnBba/Xq/b2dpWWlmrRokWDzikqKlJhYWFwu6enh0ACAECEsvLR3vnz5+vs2bNDPu52uxUfHx/SAABAZLISRpqamuTxeGwcGgAA3GQcv01z4cIF/fnPfw5ut7a2qrm5WdOmTdOMGTNUVFSkjo4O7d69W5JUVlamWbNmKSsrS729vdqzZ498Pp98Pt/YnQUAAJiwHIeRhoYGPfTQQ8Htq/d2rFmzRlVVVfL7/Wpraws+3tvbq82bN6ujo0NTp05VVlaWDh48qPz8/DFYPgAAmOgch5ElS5bIGDPk41VVVSHbW7Zs0ZYtWxwvDAAATA58Nw0AALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACrCCMAAMAqwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwynEYqa+v1/Lly5WSkiKXy6U33nhjxDl1dXXKzs5WbGysZs+erYqKinDWCgAAIpDjMHLx4kXNmzdPr7zyyqjGt7a2Kj8/XwsXLlRTU5OKi4u1ceNG+Xw+x4sFAACRJ8rphGXLlmnZsmWjHl9RUaEZM2aorKxMkpSZmamGhgaVlpZqxYoVTg8PAAAizA2/Z+TYsWPKy8sL6Vu6dKkaGhrU19c36JxAIKCenp6QBgAAIpPjKyNOdXZ2KikpKaQvKSlJly5d0vnz5+XxeAbMKSkp0fPPP3+jlyZJmvXMwTHZz7tbHxmT/Ux2Y/XzGI3R/MzGcz3jaTzPa6yONZl/Vyfiud9sP3fc3Mbl0zQulytk2xgzaP9VRUVF6u7uDrb29vYbvkYAAGDHDb8ykpycrM7OzpC+rq4uRUVFafr06YPOcbvdcrvdN3ppAADgJnDDr4x4vV5VV1eH9B05ckQ5OTmKjo6+0YcHAAA3Ocdh5MKFC2publZzc7OkKx/dbW5uVltbm6Qrb7GsXr06OL6goEDnzp1TYWGhTp8+rZ07d6qyslKbN28emzMAAAATmuO3aRoaGvTQQw8FtwsLCyVJa9asUVVVlfx+fzCYSFJ6eroOHTqkTZs26dVXX1VKSoq2bdvGx3oBAICkMMLIkiVLgjegDqaqqmpA3+LFi9XY2Oj0UAAAYBLgu2kAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVWGFke3btys9PV2xsbHKzs7W0aNHhxxbW1srl8s1oJ05cybsRQMAgMjhOIy8/vrreuqpp/Tss8+qqalJCxcu1LJly9TW1jbsvJaWFvn9/mC75557wl40AACIHI7DyEsvvaTvfOc7Wrt2rTIzM1VWVqa0tDSVl5cPOy8xMVHJycnBNmXKlLAXDQAAIoejMNLb26sTJ04oLy8vpD8vL09vvfXWsHPvv/9+eTwe5ebmqqamZtixgUBAPT09IQ0AAEQmR2Hk/Pnzunz5spKSkkL6k5KS1NnZOegcj8ejHTt2yOfzad++fcrIyFBubq7q6+uHPE5JSYkSEhKCLS0tzckyAQDABBIVziSXyxWybYwZ0HdVRkaGMjIygtter1ft7e0qLS3VokWLBp1TVFSkwsLC4HZPTw+BBACACOXoysidd96pKVOmDLgK0tXVNeBqyXDmz5+vs2fPDvm42+1WfHx8SAMAAJHJURiJiYlRdna2qqurQ/qrq6v14IMPjno/TU1N8ng8Tg4NAAAilOO3aQoLC7Vq1Srl5OTI6/Vqx44damtrU0FBgaQrb7F0dHRo9+7dkqSysjLNmjVLWVlZ6u3t1Z49e+Tz+eTz+cb2TAAAwITkOIysXLlSf/3rX/XCCy/I7/drzpw5OnTokGbOnClJ8vv9IX9zpLe3V5s3b1ZHR4emTp2qrKwsHTx4UPn5+WN3FgAAYMIK6wbWJ598Uk8++eSgj1VVVYVsb9myRVu2bAnnMAAAYBLgu2kAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVYQRAABgFWEEAABYRRgBAABWEUYAAIBVhBEAAGAVYQQAAFhFGAEAAFYRRgAAgFWEEQAAYBVhBAAAWEUYAQAAVhFGAACAVWGFke3btys9PV2xsbHKzs7W0aNHhx1fV1en7OxsxcbGavbs2aqoqAhrsQAAIPI4DiOvv/66nnrqKT377LNqamrSwoULtWzZMrW1tQ06vrW1Vfn5+Vq4cKGamppUXFysjRs3yufzXffiAQDAxOc4jLz00kv6zne+o7Vr1yozM1NlZWVKS0tTeXn5oOMrKio0Y8YMlZWVKTMzU2vXrtUTTzyh0tLS6148AACY+KKcDO7t7dWJEyf0zDPPhPTn5eXprbfeGnTOsWPHlJeXF9K3dOlSVVZWqq+vT9HR0QPmBAIBBQKB4HZ3d7ckqaenx8lyR6U/8Pcx2c9YrW0067kRdbhZjNXPYzRGU8ebbT2jMZ5rnogmc53H6jl/s/3ujKfx/Pd3PH8WN+q8ru7XGDP8QONAR0eHkWR+97vfhfS/+OKL5tOf/vSgc+655x7z4osvhvT97ne/M5LMe++9N+ic5557zkii0Wg0Go0WAa29vX3YfOHoyshVLpcrZNsYM6BvpPGD9V9VVFSkwsLC4HZ/f7/+9re/afr06cMeZ6z19PQoLS1N7e3tio+PH7fjRgJqFx7qFh7qFj5qFx7qNjrGGH344YdKSUkZdpyjMHLnnXdqypQp6uzsDOnv6upSUlLSoHOSk5MHHR8VFaXp06cPOsftdsvtdof03X777U6WOqbi4+N5soWJ2oWHuoWHuoWP2oWHuo0sISFhxDGObmCNiYlRdna2qqurQ/qrq6v14IMPDjrH6/UOGH/kyBHl5OQMer8IAACYXBx/mqawsFA/+clPtHPnTp0+fVqbNm1SW1ubCgoKJF15i2X16tXB8QUFBTp37pwKCwt1+vRp7dy5U5WVldq8efPYnQUAAJiwHN8zsnLlSv31r3/VCy+8IL/frzlz5ujQoUOaOXOmJMnv94f8zZH09HQdOnRImzZt0quvvqqUlBRt27ZNK1asGLuzuEHcbreee+65AW8ZYWTULjzULTzULXzULjzUbWy5jBnp8zYAAAA3Dt9NAwAArCKMAAAAqwgjAADAKsIIAACwatKFkZKSEj3wwAOKi4tTYmKiHn/8cbW0tAw5/p//+Z/lcrlUVlYW0h8IBLRhwwbdeeeduvXWW/Xoo4/qL3/5yw1evT2jrdvp06f16KOPKiEhQXFxcZo/f37Ip6smW92k0dXuwoULWr9+vVJTUzV16lRlZmYO+PLJyVa78vJyzZ07N/hHpbxer958883g48YY/du//ZtSUlI0depULVmyRH/4wx9C9jHZanbVcLXr6+vTD37wA332s5/VrbfeqpSUFK1evVrvvfdeyD4mY+1Ges59Eq8NY2xUX0oTQZYuXWp27dpl3nnnHdPc3GweeeQRM2PGDHPhwoUBY/fv32/mzZtnUlJSzI9+9KOQxwoKCsynPvUpU11dbRobG81DDz1k5s2bZy5dujROZzK+RlO3P//5z2batGnm6aefNo2Njea///u/zS9/+Uvz/vvvB8dMtroZM7rarV271vzDP/yDqampMa2trea1114zU6ZMMW+88UZwzGSr3YEDB8zBgwdNS0uLaWlpMcXFxSY6Otq88847xhhjtm7dauLi4ozP5zOnTp0yK1euNB6Px/T09AT3MdlqdtVwtfvggw/Mww8/bF5//XVz5swZc+zYMfO5z33OZGdnh+xjMtZupOfcVbw2jL1JF0au1dXVZSSZurq6kP6//OUv5lOf+pR55513zMyZM0OecB988IGJjo42e/fuDfZ1dHSYW265xRw+fHi8lm7VYHVbuXKl+eY3vznkHOp2xWC1y8rKMi+88ELIuH/8x380//qv/2qMoXZX3XHHHeYnP/mJ6e/vN8nJyWbr1q3Bxz7++GOTkJBgKioqjDHU7FpXazeY3//+90aSOXfunDGG2n3StXXjteHGmHRv01yru7tbkjRt2rRgX39/v1atWqWnn35aWVlZA+acOHFCfX19ysvLC/alpKRozpw5euutt278om8C19atv79fBw8e1Kc//WktXbpUiYmJ+tznPqc33ngjOIe6XTHYc27BggU6cOCAOjo6ZIxRTU2N/vSnP2np0qWSqN3ly5e1d+9eXbx4UV6vV62trers7Ayph9vt1uLFi4P1mOw1u+ra2g2mu7tbLpcr+B1g1G7wuvHacONM6jBijFFhYaEWLFigOXPmBPv//d//XVFRUdq4ceOg8zo7OxUTE6M77rgjpD8pKWnAlwJGosHq1tXVpQsXLmjr1q364he/qCNHjuhLX/qSvvzlL6uurk4SdZOGfs5t27ZN9957r1JTUxUTE6MvfvGL2r59uxYsWCBp8tbu1KlTuu222+R2u1VQUKD9+/fr3nvvDZ7ztV/Q+cl6TNaaXTVU7a718ccf65lnntHXv/714Be+TebaDVc3XhtuHMd/Dj6SrF+/XidPntRvf/vbYN+JEyf08ssvq7GxUS6Xy9H+jDGO50xEg9Wtv79fkvTYY49p06ZNkqT77rtPb731lioqKrR48eIh9zdZ6iYNXjvpShg5fvy4Dhw4oJkzZ6q+vl5PPvmkPB6PHn744SH3F+m1y8jIUHNzsz744AP5fD6tWbMmGG4lDTj30dQj0mt21VC1+2Qg6evr01e/+lX19/dr+/btI+5zMtRuqLp99NFHvDbcQJP2ysiGDRt04MAB1dTUKDU1Ndh/9OhRdXV1acaMGYqKilJUVJTOnTunf/mXf9GsWbMkScnJyert7dX//u//huyzq6trwP/UIs1QdbvzzjsVFRU14H9emZmZwU/TTOa6SUPX7qOPPlJxcbFeeuklLV++XHPnztX69eu1cuVKlZaWSpq8tYuJidHdd9+tnJwclZSUaN68eXr55ZeVnJwsSQP+t/nJekzWml01VO2u6uvr01e+8hW1traquro6eFVEmty1G6puvDbcWJMujBhjtH79eu3bt0+/+c1vlJ6eHvL4qlWrdPLkSTU3NwdbSkqKnn76af3qV7+SJGVnZys6OlrV1dXBeX6/X++8844efPDBcT2f8TJS3WJiYvTAAw8M+Mjqn/70p+CXKE7Gukkj166vr099fX265ZbQX8cpU6YErzhN1tpdyxijQCCg9PR0JScnh9Sjt7dXdXV1wXpQs1BXayf9fxA5e/asfv3rX2v69OkhY6nd/7taN14bbrBxv2XWsu9973smISHB1NbWGr/fH2x///vfh5xz7R3Txlz5+FZqaqr59a9/bRobG83nP//5iP741mjqtm/fPhMdHW127Nhhzp49a3784x+bKVOmmKNHjwbHTLa6GTO62i1evNhkZWWZmpoa8z//8z9m165dJjY21mzfvj04ZrLVrqioyNTX15vW1lZz8uRJU1xcbG655RZz5MgRY8yVj/YmJCSYffv2mVOnTpmvfe1rg360dzLV7KrhatfX12ceffRRk5qaapqbm0Oek4FAILiPyVi7kZ5z1+K1YexMujAiadC2a9euIecM9oT76KOPzPr16820adPM1KlTzT/90z+Ztra2G7t4i0Zbt8rKSnP33Xeb2NhYM2/evJC/k2HM5KubMaOrnd/vN9/61rdMSkqKiY2NNRkZGeY//uM/TH9/f3DMZKvdE088YWbOnGliYmLMXXfdZXJzc0NeFPr7+81zzz1nkpOTjdvtNosWLTKnTp0K2cdkq9lVw9WutbV1yOdkTU1NcB+TsXYjPeeuxWvD2HEZY8z4XYcBAAAINenuGQEAADcXwggAALCKMAIAAKwijAAAAKsIIwAAwCrCCAAAsIowAgAArCKMAAAAqwgjAADAKsIIAACwijACAACsIowAAACr/g9F6UqqHOUOLAAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "fig, ax = plt.subplots()\n",
+    "ax.hist(slim[slim[\"query\"] == 142][\"bit score\"], bins=50);"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 37,
diff --git a/analysis/suppl-horizontal_gene_transfer.ipynb b/analysis/suppl-horizontal_gene_transfer.ipynb
index d6a508d..a074e64 100644
--- a/analysis/suppl-horizontal_gene_transfer.ipynb
+++ b/analysis/suppl-horizontal_gene_transfer.ipynb
@@ -677,7 +677,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.6"
+   "version": "3.10.8"
   }
  },
  "nbformat": 4,
-- 
GitLab