{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "19a33b01-d8d4-49de-9e05-302c14fb7c42",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2023-01-25 21:33\n"
     ]
    }
   ],
   "source": [
    "from datetime import datetime, timezone\n",
    "import pandas as pd\n",
    "import pytz\n",
    "\n",
    "utc_dt = datetime.now(timezone.utc) # UTC time\n",
    "dt = utc_dt.astimezone()\n",
    "tz = pytz.timezone('Europe/Berlin')\n",
    "berlin_now = datetime.now(tz)\n",
    "print(f'{berlin_now:%Y-%m-%d %H:%M}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "55f808fc-0a34-48d5-96b9-659d27f16f13",
   "metadata": {},
   "source": [
    "The reviewers challenged us to look for the HGT candidates in the nearest non-metazoan outgroup, choanoflagellates. We are using _Salpingoeca rosetta_ and _Monosiga brevicollis_, two model choanoflagellates with publically available genomes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c4ad4dc2-a115-4679-8947-c6ed78582bbd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
      "                                 Dload  Upload   Total   Spent    Left  Speed\n",
      "100 9376k    0 9376k    0     0   789k      0 --:--:--  0:00:11 --:--:--  321k\n",
      "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
      "                                 Dload  Upload   Total   Spent    Left  Speed\n",
      "100 6562k    0 6562k    0     0  1575k      0 --:--:--  0:00:04 --:--:-- 1577k0  1460k      0 --:--:--  0:00:03 --:--:-- 1462k\n"
     ]
    }
   ],
   "source": [
    "!curl \"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28proteome%3AUP000007799%29%29\" -o salpingoeca.faa\n",
    "!curl \"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28proteome%3AUP000001357%29%29\" -o monosiga.faa\n",
    "!rm -rf tmp/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "aae09518-9f38-4c3e-90e0-f8595589e748",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%bash --out salpingoeca.out --err salpingoeca.err\n",
    "hgt_candidates=\"/Users/npapadop/Documents/data/coffe/hgt.pep\"\n",
    "mmseqs easy-search ${hgt_candidates} \"./salpingoeca.faa\" salpingoeca.m8 tmp --search-type 2 -s 7.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9f1aa456-1f6f-4523-b923-763b603b649c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%bash --out monosiga.out --err monosiga.err\n",
    "hgt_candidates=\"/Users/npapadop/Documents/data/coffe/hgt.pep\"\n",
    "mmseqs easy-search ${hgt_candidates} \"./monosiga.faa\" monosiga.m8 tmp --search-type 2 -s 7.0"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "80982650-856c-4b18-be0e-501b240533ab",
   "metadata": {},
   "source": [
    "Let's have a look at the results:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "169716b0-3ce2-4eb7-8e53-915c99ec5122",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c103983_g1_i1_m.71422,</td>\n",
       "      <td>A9V527</td>\n",
       "      <td>0.251</td>\n",
       "      <td>505</td>\n",
       "      <td>370</td>\n",
       "      <td>0</td>\n",
       "      <td>65</td>\n",
       "      <td>569</td>\n",
       "      <td>66</td>\n",
       "      <td>560</td>\n",
       "      <td>3.201000e-14</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c97022_g1_i1_m.29482,</td>\n",
       "      <td>A9V324</td>\n",
       "      <td>0.302</td>\n",
       "      <td>135</td>\n",
       "      <td>91</td>\n",
       "      <td>0</td>\n",
       "      <td>93</td>\n",
       "      <td>224</td>\n",
       "      <td>114</td>\n",
       "      <td>248</td>\n",
       "      <td>9.888000e-05</td>\n",
       "      <td>44</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c97022_g1_i1_m.29482,</td>\n",
       "      <td>A9V989</td>\n",
       "      <td>0.233</td>\n",
       "      <td>255</td>\n",
       "      <td>191</td>\n",
       "      <td>0</td>\n",
       "      <td>61</td>\n",
       "      <td>315</td>\n",
       "      <td>684</td>\n",
       "      <td>933</td>\n",
       "      <td>1.727000e-04</td>\n",
       "      <td>43</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       0       1      2    3    4   5   6    7    8    9   \\\n",
       "0  c103983_g1_i1_m.71422,  A9V527  0.251  505  370   0  65  569   66  560   \n",
       "1   c97022_g1_i1_m.29482,  A9V324  0.302  135   91   0  93  224  114  248   \n",
       "2   c97022_g1_i1_m.29482,  A9V989  0.233  255  191   0  61  315  684  933   \n",
       "\n",
       "             10  11  \n",
       "0  3.201000e-14  76  \n",
       "1  9.888000e-05  44  \n",
       "2  1.727000e-04  43  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv(\"monosiga.m8\", sep=\"\\t\", header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "8c324422-faed-4249-89da-cb1dafeac4d1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>c103983_g1_i1_m.71422,</td>\n",
       "      <td>F2UGB0</td>\n",
       "      <td>0.253</td>\n",
       "      <td>490</td>\n",
       "      <td>354</td>\n",
       "      <td>0</td>\n",
       "      <td>63</td>\n",
       "      <td>552</td>\n",
       "      <td>159</td>\n",
       "      <td>633</td>\n",
       "      <td>2.262000e-16</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c97022_g1_i1_m.29482,</td>\n",
       "      <td>F2TZ54</td>\n",
       "      <td>0.262</td>\n",
       "      <td>260</td>\n",
       "      <td>185</td>\n",
       "      <td>0</td>\n",
       "      <td>56</td>\n",
       "      <td>315</td>\n",
       "      <td>19</td>\n",
       "      <td>270</td>\n",
       "      <td>5.263000e-07</td>\n",
       "      <td>52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c97022_g1_i1_m.29482,</td>\n",
       "      <td>F2UQL9</td>\n",
       "      <td>0.319</td>\n",
       "      <td>135</td>\n",
       "      <td>89</td>\n",
       "      <td>0</td>\n",
       "      <td>93</td>\n",
       "      <td>224</td>\n",
       "      <td>111</td>\n",
       "      <td>245</td>\n",
       "      <td>8.588000e-06</td>\n",
       "      <td>48</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       0       1      2    3    4   5   6    7    8    9   \\\n",
       "0  c103983_g1_i1_m.71422,  F2UGB0  0.253  490  354   0  63  552  159  633   \n",
       "1   c97022_g1_i1_m.29482,  F2TZ54  0.262  260  185   0  56  315   19  270   \n",
       "2   c97022_g1_i1_m.29482,  F2UQL9  0.319  135   89   0  93  224  111  245   \n",
       "\n",
       "             10  11  \n",
       "0  2.262000e-16  83  \n",
       "1  5.263000e-07  52  \n",
       "2  8.588000e-06  48  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv(\"salpingoeca.m8\", sep=\"\\t\", header=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8184c03c-4741-4499-a346-a31f4e4ed61c",
   "metadata": {},
   "source": [
    "In both cases the only relevant hit that is found is c103983_g1, the gene EggNOG v5.0 identifies as \"metal-dependent hydrolase - Proteobacteria\" and MorF putatively identifies as an aminohydrolase."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}