@article { gewehr_zimmer_07,
	title = {{AutoSCOP: automated prediction of SCOP classifications using unique pattern-class mappings}},
	journal = {Bioinformatics},
	volume = {23},
	number = {10},
	year = {2007},
	pages = {1203-1210},
	abstract = {MOTIVATION: The sequence patterns contained in the available motif

and hidden Markov model (HMM) databases are a valuable source of information for

protein sequence annotation. For structure prediction and fold recognition

purposes, we computed mappings from such pattern databases to the protein domain

hierarchy given by the ASTRAL compendium and applied them to the prediction of

SCOP classifications. Our aim is to make highly confident predictions also for

non-trivial cases if possible and abstain from a prediction otherwise, and thus

to provide a method that can be used as a first step in a pipeline of prediction

methods. We describe two successful examples for such pipelines. With the

AutoSCOP approach, it is possible to make predictions in a large-scale manner for

many domains of the available sequences in the well-known protein sequence

databases. RESULTS: AutoSCOP computes unique sequence patterns and pattern

combinations for SCOP classifications. For instance, we assign a SCOP superfamily

to a pattern found in its members whenever the pattern does not occur in any

other SCOP superfamily. Especially on the fold and superfamily level, our method

achieves both high sensitivity (above 93\%) and high specificity (above 98\%) on

the difference set between two ASTRAL versions, due to being able to abstain from

unreliable predictions. Further, on a harder test set filtered at low sequence

identity, the combination with profile-profile alignments improves accuracy and

performs comparably even to structure alignment methods. Integrating our method

with structure alignment, we are able to achieve an accuracy of 99\% on SCOP fold

classifications on this set. In an analysis of false assignments of domains from

new folds/superfamilies/families to existing SCOP classifications, AutoSCOP

correctly abstains for more than 70\% of the domains belonging to new folds and

superfamilies, and more than 80\% of the domains belonging to new families. These

findings show that our approach is a useful additional filter for SCOP

classification prediction of protein domains in combination with well-known

methods such as profile-profile alignment. AVAILABILITY: A web server where users

can input their domain sequences is available at

http://www.bio.ifi.lmu.de/autoscop.},
	keywords = {autoscop-07},
	doi = {10.1093/bioinformatics/bti751},
	author = {Jan Erik Gewehr and Volker Hintermair and Ralf Zimmer}
}