{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "19a33b01-d8d4-49de-9e05-302c14fb7c42", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2023-01-25 21:33\n" ] } ], "source": [ "from datetime import datetime, timezone\n", "import pandas as pd\n", "import pytz\n", "\n", "utc_dt = datetime.now(timezone.utc) # UTC time\n", "dt = utc_dt.astimezone()\n", "tz = pytz.timezone('Europe/Berlin')\n", "berlin_now = datetime.now(tz)\n", "print(f'{berlin_now:%Y-%m-%d %H:%M}')" ] }, { "cell_type": "markdown", "id": "55f808fc-0a34-48d5-96b9-659d27f16f13", "metadata": {}, "source": [ "The reviewers challenged us to look for the HGT candidates in the nearest non-metazoan outgroup, choanoflagellates. We are using _Salpingoeca rosetta_ and _Monosiga brevicollis_, two model choanoflagellates with publically available genomes." ] }, { "cell_type": "code", "execution_count": 2, "id": "c4ad4dc2-a115-4679-8947-c6ed78582bbd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 9376k 0 9376k 0 0 789k 0 --:--:-- 0:00:11 --:--:-- 321k\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 6562k 0 6562k 0 0 1575k 0 --:--:-- 0:00:04 --:--:-- 1577k0 1460k 0 --:--:-- 0:00:03 --:--:-- 1462k\n" ] } ], "source": [ "!curl \"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28proteome%3AUP000007799%29%29\" -o salpingoeca.faa\n", "!curl \"https://rest.uniprot.org/uniprotkb/stream?format=fasta&query=%28%28proteome%3AUP000001357%29%29\" -o monosiga.faa\n", "!rm -rf tmp/" ] }, { "cell_type": "code", "execution_count": 3, "id": "aae09518-9f38-4c3e-90e0-f8595589e748", "metadata": { "tags": [] }, "outputs": [], "source": [ "%%bash --out salpingoeca.out --err salpingoeca.err\n", "hgt_candidates=\"/Users/npapadop/Documents/data/coffe/hgt.pep\"\n", "mmseqs easy-search ${hgt_candidates} \"./salpingoeca.faa\" salpingoeca.m8 tmp --search-type 2 -s 7.0" ] }, { "cell_type": "code", "execution_count": 4, "id": "9f1aa456-1f6f-4523-b923-763b603b649c", "metadata": { "tags": [] }, "outputs": [], "source": [ "%%bash --out monosiga.out --err monosiga.err\n", "hgt_candidates=\"/Users/npapadop/Documents/data/coffe/hgt.pep\"\n", "mmseqs easy-search ${hgt_candidates} \"./monosiga.faa\" monosiga.m8 tmp --search-type 2 -s 7.0" ] }, { "cell_type": "markdown", "id": "80982650-856c-4b18-be0e-501b240533ab", "metadata": {}, "source": [ "Let's have a look at the results:" ] }, { "cell_type": "code", "execution_count": 5, "id": "169716b0-3ce2-4eb7-8e53-915c99ec5122", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>0</th>\n", " <th>1</th>\n", " <th>2</th>\n", " <th>3</th>\n", " <th>4</th>\n", " <th>5</th>\n", " <th>6</th>\n", " <th>7</th>\n", " <th>8</th>\n", " <th>9</th>\n", " <th>10</th>\n", " <th>11</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>c103983_g1_i1_m.71422,</td>\n", " <td>A9V527</td>\n", " <td>0.251</td>\n", " <td>505</td>\n", " <td>370</td>\n", " <td>0</td>\n", " <td>65</td>\n", " <td>569</td>\n", " <td>66</td>\n", " <td>560</td>\n", " <td>3.201000e-14</td>\n", " <td>76</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>c97022_g1_i1_m.29482,</td>\n", " <td>A9V324</td>\n", " <td>0.302</td>\n", " <td>135</td>\n", " <td>91</td>\n", " <td>0</td>\n", " <td>93</td>\n", " <td>224</td>\n", " <td>114</td>\n", " <td>248</td>\n", " <td>9.888000e-05</td>\n", " <td>44</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>c97022_g1_i1_m.29482,</td>\n", " <td>A9V989</td>\n", " <td>0.233</td>\n", " <td>255</td>\n", " <td>191</td>\n", " <td>0</td>\n", " <td>61</td>\n", " <td>315</td>\n", " <td>684</td>\n", " <td>933</td>\n", " <td>1.727000e-04</td>\n", " <td>43</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 \\\n", "0 c103983_g1_i1_m.71422, A9V527 0.251 505 370 0 65 569 66 560 \n", "1 c97022_g1_i1_m.29482, A9V324 0.302 135 91 0 93 224 114 248 \n", "2 c97022_g1_i1_m.29482, A9V989 0.233 255 191 0 61 315 684 933 \n", "\n", " 10 11 \n", "0 3.201000e-14 76 \n", "1 9.888000e-05 44 \n", "2 1.727000e-04 43 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.read_csv(\"monosiga.m8\", sep=\"\\t\", header=None)" ] }, { "cell_type": "code", "execution_count": 6, "id": "8c324422-faed-4249-89da-cb1dafeac4d1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>0</th>\n", " <th>1</th>\n", " <th>2</th>\n", " <th>3</th>\n", " <th>4</th>\n", " <th>5</th>\n", " <th>6</th>\n", " <th>7</th>\n", " <th>8</th>\n", " <th>9</th>\n", " <th>10</th>\n", " <th>11</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>c103983_g1_i1_m.71422,</td>\n", " <td>F2UGB0</td>\n", " <td>0.253</td>\n", " <td>490</td>\n", " <td>354</td>\n", " <td>0</td>\n", " <td>63</td>\n", " <td>552</td>\n", " <td>159</td>\n", " <td>633</td>\n", " <td>2.262000e-16</td>\n", " <td>83</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>c97022_g1_i1_m.29482,</td>\n", " <td>F2TZ54</td>\n", " <td>0.262</td>\n", " <td>260</td>\n", " <td>185</td>\n", " <td>0</td>\n", " <td>56</td>\n", " <td>315</td>\n", " <td>19</td>\n", " <td>270</td>\n", " <td>5.263000e-07</td>\n", " <td>52</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>c97022_g1_i1_m.29482,</td>\n", " <td>F2UQL9</td>\n", " <td>0.319</td>\n", " <td>135</td>\n", " <td>89</td>\n", " <td>0</td>\n", " <td>93</td>\n", " <td>224</td>\n", " <td>111</td>\n", " <td>245</td>\n", " <td>8.588000e-06</td>\n", " <td>48</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 \\\n", "0 c103983_g1_i1_m.71422, F2UGB0 0.253 490 354 0 63 552 159 633 \n", "1 c97022_g1_i1_m.29482, F2TZ54 0.262 260 185 0 56 315 19 270 \n", "2 c97022_g1_i1_m.29482, F2UQL9 0.319 135 89 0 93 224 111 245 \n", "\n", " 10 11 \n", "0 2.262000e-16 83 \n", "1 5.263000e-07 52 \n", "2 8.588000e-06 48 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.read_csv(\"salpingoeca.m8\", sep=\"\\t\", header=None)" ] }, { "cell_type": "markdown", "id": "8184c03c-4741-4499-a346-a31f4e4ed61c", "metadata": {}, "source": [ "In both cases the only relevant hit that is found is c103983_g1, the gene EggNOG v5.0 identifies as \"metal-dependent hydrolase - Proteobacteria\" and MorF putatively identifies as an aminohydrolase." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }