2024
Wijesekara, Yasas; Wu, Ling-Yi; Beeloo, Rick; Rozwalak, Piotr; Hauptfeld, Ernestina; Doijad, Swapnil P.; Dutilh, Bas E.; Kaderali, Lars
Jaeger: an accurate and fast deep-learning tool to detect bacteriophage sequences Journal Article
In: bioRxiv, 2024.
Abstract | BibTeX | Tags: Project 14, WP 1.1 Virus identification, WP 1.2 Host prediction
@article{Wijesekara2024,
title = {Jaeger: an accurate and fast deep-learning tool to detect bacteriophage sequences},
author = {Yasas Wijesekara and Ling-Yi Wu and Rick Beeloo and Piotr Rozwalak and Ernestina Hauptfeld and Swapnil P. Doijad and Bas E. Dutilh and Lars Kaderali},
year = {2024},
date = {2024-09-24},
journal = {bioRxiv},
abstract = {Viruses are integral to every biome on Earth, yet we still need a more comprehensive picture of their identity and global distribution. Global metagenomics sequencing efforts revealed the genomic content of tens of thousands of environmental samples, however identifying the viral sequences in these datasets remains challenging due to their vast genomic diversity. Here, we address identifying bacteriophage sequences in unlabeled sequencing data. In a recent benchmarking paper, we observed that existing deep-learning tools show a high true positive rate, but may also produce many false positives when confronted with divergent sequences. To tackle this challenge, we introduce Jaeger, a novel deep-learning method designed specifically for identifying bacteriophage genome fragments. Extensive benchmarking on the IMG/VR database and real-world metagenomes reveals Jaeger’s consistent high sensitivity (0.87) and precision (0.92). Applying Jaeger to over 16,000 metagenomic assemblies from the MGnify database yielded over five million putative phage contigs. On average, Jaeger is around 20 times faster than the other state-of-the-art methods. Jaeger is available at https://github.com/MGXlab/Jaeger.},
keywords = {Project 14, WP 1.1 Virus identification, WP 1.2 Host prediction},
pubstate = {published},
tppubtype = {article}
}
Peng, Xue; Smith, Sophie Elizabeth; Huang, Wanqi; Ru, Jinlong; Mirzaei, Mohammadali Khan; Deng, Li
Metagenomic analyses of single phages and phage cocktails show instances of contamination with temperate phages and bacterial DNA Journal Article
In: bioRxiv, 2024.
Abstract | Links | BibTeX | Tags: Project 05, WP 1.1 Virus identification, WP 1.2 Host prediction
@article{Peng2024,
title = {Metagenomic analyses of single phages and phage cocktails show instances of contamination with temperate phages and bacterial DNA},
author = {Xue Peng and Sophie Elizabeth Smith and Wanqi Huang and Jinlong Ru and Mohammadali Khan Mirzaei and Li Deng},
doi = {10.1101/2024.09.12.612727},
year = {2024},
date = {2024-09-12},
journal = {bioRxiv},
abstract = {Increasing antibiotic resistance has led to renewed attention being paid to bacteriophage therapy. Commercial phage cocktails are available but often their contents of the phages are not well defined. Some metagenomic studies have been done to retrospectively characterise these cocktails, but little is known about the replication cycle of the included phages, or about the possible bacterial DNA contamination. In this study, published metagenomic sequences were reanalysed using recent advances in viromics tools. Signs of temperate phage contigs were found in all cocktail metagenomes, as well as host DNA, which could poses a risk as it may lead to horizontal gene transfer of virulence factors to commensals and pathogens. This suggests the need to implement further quality measures before using phage cocktails therapeutically.},
keywords = {Project 05, WP 1.1 Virus identification, WP 1.2 Host prediction},
pubstate = {published},
tppubtype = {article}
}
Deboutte, Ward; Smet, Lina De; Brunain, Marleen; Basler, Nikolas; Rycke, Riet De; Smets, Lena; de Graaf, Dirk C.; Matthijnssens, Jelle
Known and novel viruses in Belgian honey bees: yearly differences, spatial clustering, and associations with overwintering loss Journal Article
In: Microbiol Spectrum, vol. 12, iss. 7, pp. e0358123, 2024.
Abstract | Links | BibTeX | Tags: Project 11, WP 1.1 Virus identification, WP 1.2 Host prediction
@article{Deboutte2024,
title = {Known and novel viruses in Belgian honey bees: yearly differences, spatial clustering, and associations with overwintering loss},
author = {Ward Deboutte and Lina De Smet and Marleen Brunain and Nikolas Basler and Riet De Rycke and Lena Smets and Dirk C. de Graaf and Jelle Matthijnssens},
doi = {10.1128/spectrum.03581-23},
year = {2024},
date = {2024-07-02},
journal = {Microbiol Spectrum},
volume = {12},
issue = {7},
pages = {e0358123},
abstract = {In recent years, managed honey bee colonies have been suffering from an increasing number of biotic and abiotic stressors, resulting in numerous losses of colonies worldwide. A pan-European study, EPILOBEE, estimated the colony loss in Belgium to be 32.4% in 2012 and 14.8% in 2013. In the current study, absolute viral loads of four known honey bee viruses (DWV-A, DWV-B, AmFV, and BMLV) and three novel putative honey bee viruses (Apis orthomyxovirus 1, apthili virus, and apparli virus) were determined in 300 Flemish honey bee samples, and associations with winter survival were determined. This revealed that, in addition to the known influence of DWV-A and DWV-B on colony health, one of the newly described viruses (apthili virus) shows a strong yearly difference and is also associated with winter survival. Furthermore, all scrutinized viruses revealed significant spatial clustering patterns, implying that despite the limited surface area of Flanders, local virus transmission is paramount. The vast majority of samples were positive for at least one of the seven investigated viruses, and up to 20% of samples were positive for at least one of the three novel viruses. One of those three, Apis orthomyxovirus 1, was shown to be a genuine honey bee-infecting virus, able to infect all developmental stages of the honey bee, as well as the Varroa destructor mite. These results shed light on the most prevalent viruses in Belgium and their roles in the winter survival of honey bee colonies.
Importance: The western honey bee (Apis mellifera) is a highly effective pollinator of flowering plants, including many crops, which gives honey bees an outstanding importance both ecologically and economically. Alarmingly high annual loss rates of managed honey bee colonies are a growing concern for beekeepers and scientists and have prompted a significant research effort toward bee health. Several detrimental factors have been identified, such as varroa mite infestation and disease from various bacterial and viral agents, but annual differences are often not elucidated. In this study, we utilize the viral metagenomic survey of the EPILOBEE project, a European research program for bee health, to elaborate on the most abundant bee viruses of Flanders. We complement the existing metagenomic data with absolute viral loads and their spatial and temporal distributions. Furthermore, we identify Apis orthomyxovirus 1 as a potentially emerging pathogen, as we find evidence for its active replication honey bees.},
keywords = {Project 11, WP 1.1 Virus identification, WP 1.2 Host prediction},
pubstate = {published},
tppubtype = {article}
}
Importance: The western honey bee (Apis mellifera) is a highly effective pollinator of flowering plants, including many crops, which gives honey bees an outstanding importance both ecologically and economically. Alarmingly high annual loss rates of managed honey bee colonies are a growing concern for beekeepers and scientists and have prompted a significant research effort toward bee health. Several detrimental factors have been identified, such as varroa mite infestation and disease from various bacterial and viral agents, but annual differences are often not elucidated. In this study, we utilize the viral metagenomic survey of the EPILOBEE project, a European research program for bee health, to elaborate on the most abundant bee viruses of Flanders. We complement the existing metagenomic data with absolute viral loads and their spatial and temporal distributions. Furthermore, we identify Apis orthomyxovirus 1 as a potentially emerging pathogen, as we find evidence for its active replication honey bees.
2023
Luo, Shiqi; Ru, Jinlong; Mirzaei, Mohammadali Khan; Xue, Jinling; Peng, Xue; Ralser, Anna; Hadi, Joshua Lemuel; Mejías-Luque, Raquel; Gerhard, Markus; Deng, Li
Helicobacter pylori infection alters gut virome by expanding temperate phages linked to increased risk of colorectal cancer Journal Article
In: Gut, pp. gutjnl-2023-330362, 2023.
Abstract | Links | BibTeX | Tags: Project 05, WP 1.2 Host prediction
@article{nokey,
title = {Helicobacter pylori infection alters gut virome by expanding temperate phages linked to increased risk of colorectal cancer},
author = {Shiqi Luo and Jinlong Ru and Mohammadali Khan Mirzaei and Jinling Xue and Xue Peng and Anna Ralser and Joshua Lemuel Hadi and Raquel Mejías-Luque and Markus Gerhard and Li Deng},
doi = {10.1136/gutjnl-2023-330362},
year = {2023},
date = {2023-11-02},
journal = {Gut},
pages = {gutjnl-2023-330362},
abstract = {No abstract available.},
keywords = {Project 05, WP 1.2 Host prediction},
pubstate = {published},
tppubtype = {article}
}
Luo, Shiqi; Ru, Jinlong; Mirzaei, Mohammadali Khan; Xue, Jinling; Peng, Xue; Ralser, Anna; Luque, Raquel Mejías; Gerhard, Markus; Deng, Li
Gut virome profiling identifies an association between temperate phages and colorectal cancer promoted by Helicobacter pylori infection Journal Article
In: Gut Microbes, vol. 15, iss. 2, pp. 2257291, 2023.
Abstract | Links | BibTeX | Tags: Project 05, WP 1.1 Virus identification, WP 1.2 Host prediction
@article{nokey,
title = {Gut virome profiling identifies an association between temperate phages and colorectal cancer promoted by Helicobacter pylori infection},
author = {Shiqi Luo and Jinlong Ru and Mohammadali Khan Mirzaei and Jinling Xue and Xue Peng and Anna Ralser and Raquel Mejías Luque and Markus Gerhard and Li Deng},
doi = {10.1080/19490976.2023.2257291},
year = {2023},
date = {2023-09-25},
journal = {Gut Microbes},
volume = {15},
issue = {2},
pages = {2257291},
abstract = {Colorectal cancer (CRC) is one of the most commonly diagnosed cancers worldwide. While a close correlation between chronic Helicobacter pylori infection and CRC has been reported, the role of the virome has been overlooked. Here, we infected Apc-mutant mouse models and C57BL/6 mice with H. pylori and conducted a comprehensive metagenomics analysis of H. pylori-induced changes in lower gastrointestinal tract bacterial and viral communities. We observed an expansion of temperate phages in H. pylori infected Apc+/1638N mice at the early stage of carcinogenesis. Some of the temperate phages were predicted to infect bacteria associated with CRC, including Enterococcus faecalis. We also observed a high prevalence of virulent genes, such as flgJ, cwlJ, and sleB, encoded by temperate phages. In addition, we identified phages associated with pre-onset and onset of H. pylori-promoted carcinogenesis. Through co-occurrence network analysis, we found strong associations between the viral and bacterial communities in infected mice before the onset of carcinogenesis. These findings suggest that the expansion of temperate phages, possibly caused by prophage induction triggered by H. pylori infection, may have contributed to the development of CRC in mice by interacting with the bacterial community.},
keywords = {Project 05, WP 1.1 Virus identification, WP 1.2 Host prediction},
pubstate = {published},
tppubtype = {article}
}
Wu, Ling-Yi; Pappas, Nikolaos; Wijesekara, Yasas; Piedade, Gonçalo J.; Brussaard, Corina P. D.; Dutilh, Bas E.
Benchmarking Bioinformatic Virus Identification Tools Using Real-World Metagenomic Data across Biomes Journal Article
In: bioRxiv, 2023.
Abstract | Links | BibTeX | Tags: Project 12, Project 13, WP 1.1 Virus identification, WP 1.2 Host prediction
@article{nokey,
title = {Benchmarking Bioinformatic Virus Identification Tools Using Real-World Metagenomic Data across Biomes},
author = {Ling-Yi Wu and Nikolaos Pappas and Yasas Wijesekara and Gonçalo J. Piedade and Corina P.D. Brussaard and Bas E. Dutilh},
doi = {10.1101/2023.04.26.538077},
year = {2023},
date = {2023-04-28},
journal = {bioRxiv},
abstract = {As most viruses remain uncultivated, metagenomics is currently the main method for virus discovery. Detecting viruses in metagenomic data is not trivial. In the past few years, many bioinformatic virus identification tools have been developed for this task, making it challenging to choose the right tools, parameters, and cutoffs. As all these tools measure different biological signals, and use different algorithms and training/reference databases, it is imperative to conduct an independent benchmarking to give users objective guidance. We compared the performance of ten state-of-the-art virus identification tools in thirteen modes on eight paired viral and microbial datasets from three distinct biomes, including a new complex dataset from Antarctic coastal waters. The tools had highly variable true positive rates (0 – 68%) and false positive rates (0 – 15%). PPR-Meta best distinguished viral from microbial contigs, followed by DeepVirFinder, VirSorter2, and VIBRANT. Different tools identified different subsets of the benchmarking data and all tools, except for Sourmash, found unique viral contigs. Tools performance could be improved with adjusted parameter cutoffs, indicating that adjustment of parameter cutoffs before usage should be considered. Together, our independent benchmarking provides guidance on choices of bioinformatic virus identification tools and gives suggestions for parameter adjustments for viromics researchers.},
keywords = {Project 12, Project 13, WP 1.1 Virus identification, WP 1.2 Host prediction},
pubstate = {published},
tppubtype = {article}
}
Liu, Dan; Young, Francesca; Robertson, David L.; Yuan, Ke
Prediction of virus-host association using protein language models and multiple instance learning Journal Article
In: bioRxiv, 2023.
Abstract | Links | BibTeX | Tags: Project 10, WP 1.2 Host prediction, WP 1.3 Virus-host interactions
@article{nokey,
title = {Prediction of virus-host association using protein language models and multiple instance learning},
author = {Dan Liu and Francesca Young and David L. Robertson and Ke Yuan},
doi = {10.1101/2023.04.07.536023},
year = {2023},
date = {2023-04-08},
urldate = {2023-04-08},
journal = {bioRxiv},
abstract = {Predicting virus-host association is essential to understand how viruses interact with host species, and discovering new therapeutics for viral diseases across humans and animals. Currently, the host of the majority of viruses is unknown. Here, we introduce EvoMIL, a deep learning method that predicts virus-host association at the species level from viral sequence only. The method combines a pre-trained large protein language model and attention-based multiple instance learning to allow protein-orientated predictions. Our results show that protein embeddings capture stronger predictive signals than traditional handcrafted features, including amino acids and DNA k-mers, and physio-chemical properties. EvoMIL binary classifiers achieve AUC values of over 0.95 for all prokaryotic and nearly 0.8 for almost all eukaryotic hosts. In multi-host prediction tasks, EvoMIL achieved median performance improvements of 8.6% in prokaryotic hosts and 1.8% in eukaryotic hosts. Furthermore, EvoMIL estimates the importance of single proteins in the prediction and maps them to an embedding landscape of all viral proteins, where proteins with similar functions are distinctly clustered together.},
keywords = {Project 10, WP 1.2 Host prediction, WP 1.3 Virus-host interactions},
pubstate = {published},
tppubtype = {article}
}
Ru, Jinlong; Mirzaei, Mohammadali Khan; Xue, Jinling; Peng, Xue; Deng, Li
ViroProfiler: a containerized bioinformatics pipeline for viral metagenomic data analysis Journal Article
In: Gut Microbes, vol. 15, iss. 1, pp. 2192522, 2023.
Abstract | Links | BibTeX | Tags: Project 05, WP 1.1 Virus identification, WP 1.2 Host prediction
@article{nokey,
title = {ViroProfiler: a containerized bioinformatics pipeline for viral metagenomic data analysis},
author = {Jinlong Ru and Mohammadali Khan Mirzaei and Jinling Xue and Xue Peng and Li Deng
},
doi = {10.1080/19490976.2023.2192522},
year = {2023},
date = {2023-03-30},
journal = {Gut Microbes},
volume = {15},
issue = {1},
pages = {2192522},
abstract = {Bacteriophages play central roles in the maintenance and function of most ecosystems by regulating bacterial communities. Yet, our understanding of their diversity remains limited due to the lack of robust bioinformatics standards. Here we present ViroProfiler, an in-silico workflow for analyzing shotgun viral metagenomic data. ViroProfiler can be executed on a local Linux computer or cloud computing environments. It uses the containerization technique to ensure computational reproducibility and facilitate collaborative research. ViroProfiler is freely available at https://github.com/deng-lab/viroprofiler.
},
keywords = {Project 05, WP 1.1 Virus identification, WP 1.2 Host prediction},
pubstate = {published},
tppubtype = {article}
}
2022
Peng, Xue; Ru, Jinlong; Mirzaei, Mohammadali Khan; Deng, Li
Replidec - Use naive Bayes classifier to identify virus lifecycle from metagenomics data Journal Article
In: bioRxiv, 2022.
Abstract | Links | BibTeX | Tags: Project 05, WP 1.2 Host prediction
@article{nokey,
title = {Replidec - Use naive Bayes classifier to identify virus lifecycle from metagenomics data},
author = {Xue Peng and Jinlong Ru and Mohammadali Khan Mirzaei and Li Deng
},
doi = {10.1101/2022.07.18.500415},
year = {2022},
date = {2022-07-19},
journal = {bioRxiv},
abstract = {Motivation: Viruses are the most abundant biological entities on earth. The majority of these entities are bacterial viruses or phages which specifically infect bacteria. Phages can use different replication strategies to invade their hosts including lytic, lysogenic, chronic cycle and pseudolysogeny. While the determination of the replication strategy used by phages is important to explore the phage-bacteria relationships in different ecosystems there are not many tools that can predict this in metagenomic data. In addition, most of the tools available can only predict lytic and lysogenic cycles. To address this issue, we have developed a new software called Replidec to identify three most common phage replication cycles (virulent, temperate, chronic) in viral sequences.
Results: Replidec uses Naive Bayes classifier combined with alignment-based methods to improve the prediction accuracy in metagenomic data. We test Replidec on viral genomes with known replication cycle and simulated metagenomic sequences. Replidec perform relatively good both in isolated genomes (F1 score: 92.29% ± 0.81; mcc: 89.14% ± 1.22) and simulated metagenomic sequences(F1 score: 87.55% ± 2.12; mcc: 88.23% ± 2.55). Moreover, Replidec can also accurately predict the replication cycle in small viral fragments(∼3000bp). In conclusion, Replidec can achieve the best performance in simulated metagenomic data compared to most prediction softwares including BACPHLIP.},
keywords = {Project 05, WP 1.2 Host prediction},
pubstate = {published},
tppubtype = {article}
}
Results: Replidec uses Naive Bayes classifier combined with alignment-based methods to improve the prediction accuracy in metagenomic data. We test Replidec on viral genomes with known replication cycle and simulated metagenomic sequences. Replidec perform relatively good both in isolated genomes (F1 score: 92.29% ± 0.81; mcc: 89.14% ± 1.22) and simulated metagenomic sequences(F1 score: 87.55% ± 2.12; mcc: 88.23% ± 2.55). Moreover, Replidec can also accurately predict the replication cycle in small viral fragments(∼3000bp). In conclusion, Replidec can achieve the best performance in simulated metagenomic data compared to most prediction softwares including BACPHLIP.
2021
Goettsch, Winfried; Beerenwinkel, Niko; Deng, Li; Dölken, Lars; Dutilh, Bas E.; Erhard, Florian; Kaderali, Lars; von Kleist, Max; Marquet, Roland; Matthijnssens, Jelle; McCallin, Shawna; McMahon, Dino; Rattei, Thomas; van Rij, Ronald P.; Robertson, David L.; Schwemmle, Martin; Stern-Ginossar, Noam; Marz, Manja
ITN -- VIROINF: Understanding (Harmful) Virus-Host Interactions by Linking Virology and Bioinformatics Journal Article
In: Viruses, vol. 13, no. 5, pp. 766, 2021.
Abstract | Links | BibTeX | Tags: Project 01, Project 02, Project 03, Project 04, Project 05, Project 06, Project 07, Project 08, Project 09, Project 10, Project 11, Project 12, Project 13, Project 14, Project 15, WP 1.1 Virus identification, WP 1.2 Host prediction, WP 1.3 Virus-host interactions, WP 1.4 Virus regulation, WP 1.5 Virus products, WP 2.1 Microevolution: Virus quasispecies, WP 2.2 Macroevolution: Natural selection of viruses
@article{nokey,
title = {ITN -- VIROINF: Understanding (Harmful) Virus-Host Interactions by Linking Virology and Bioinformatics},
author = {Winfried Goettsch and Niko Beerenwinkel and Li Deng and Lars Dölken and Bas E. Dutilh and Florian Erhard and Lars Kaderali and Max von Kleist and Roland Marquet and Jelle Matthijnssens and Shawna McCallin and Dino McMahon and Thomas Rattei and Ronald P. {van Rij} and David L. Robertson and Martin Schwemmle and Noam Stern-Ginossar and Manja Marz},
doi = {10.3390/v13050766},
year = {2021},
date = {2021-04-27},
urldate = {2021-04-27},
journal = {Viruses},
volume = {13},
number = {5},
pages = {766},
abstract = {Many recent studies highlight the fundamental importance of viruses. Besides their important role as human and animal pathogens, their beneficial, commensal or harmful functions are poorly understood. By developing and applying tailored bioinformatical tools in important virological models, the Marie Skłodowska-Curie Initiative International Training Network VIROINF will provide a better understanding of viruses and the interaction with their hosts. This will open the door to validate methods of improving viral growth, morphogenesis and development, as well as to control strategies against unwanted microorganisms. The key feature of VIROINF is its interdisciplinary nature, which brings together virologists and bioinformaticians to achieve common goals.},
keywords = {Project 01, Project 02, Project 03, Project 04, Project 05, Project 06, Project 07, Project 08, Project 09, Project 10, Project 11, Project 12, Project 13, Project 14, Project 15, WP 1.1 Virus identification, WP 1.2 Host prediction, WP 1.3 Virus-host interactions, WP 1.4 Virus regulation, WP 1.5 Virus products, WP 2.1 Microevolution: Virus quasispecies, WP 2.2 Macroevolution: Natural selection of viruses},
pubstate = {published},
tppubtype = {article}
}