001 /**
002 * Copyright (c) 2010, SIB. All rights reserved.
003 *
004 * SIB (Swiss Institute of Bioinformatics) - http://www.isb-sib.ch Host -
005 * https://sourceforge.net/projects/javaprotlib/
006 *
007 * Redistribution and use in source and binary forms, with or without
008 * modification, are permitted provided that the following conditions are met:
009 * Redistributions of source code must retain the above copyright notice, this
010 * list of conditions and the following disclaimer. Redistributions in binary
011 * form must reproduce the above copyright notice, this list of conditions and
012 * the following disclaimer in the documentation and/or other materials provided
013 * with the distribution. Neither the name of the SIB/GENEBIO nor the names of
014 * its contributors may be used to endorse or promote products derived from this
015 * software without specific prior written permission.
016 *
017 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
020 * ARE DISCLAIMED. IN NO EVENT SHALL SIB/GENEBIO BE LIABLE FOR ANY DIRECT,
021 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
024 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027 */
028 package org.expasy.jpl.commons.collection.symbol.seq;
029
030
031 import java.util.regex.Matcher;
032 import java.util.regex.Pattern;
033 import java.util.regex.PatternSyntaxException;
034
035
036 /**
037 * This class allows to search a regular expression pattern all over a symbol
038 * sequence and store positions in its inner Iterator.
039 *
040 * @author nikitin
041 *
042 * @version 1.0
043 *
044 */
045 public class MotifMatcher<T> extends AbstractSequencePositioner<T> {
046
047 /** Motif searched on SymbolSequenceImpl object */
048 protected Pattern aminoAcidMotifPattern;
049
050 /** String representation of SymbolSequenceImpl */
051 protected String symbolSequence;
052
053 /**
054 * Inner iterator over pattern matching sequence positions.
055 *
056 */
057 public class Iterator extends AbstractSequencePositioner<T>.Iterator {
058
059 protected Matcher matcher;
060
061 public Iterator() {
062 assert (symbolSequence != null);
063 matcher = aminoAcidMotifPattern.matcher(symbolSequence);
064 }
065
066 @Override
067 public boolean foundNextPosition() {
068
069 if (matcher.find()) {
070 super.nextPosition = matcher.start();
071 return true;
072 }
073 return false;
074 }
075 }
076
077 /**
078 * Create an instance for a given pattern.
079 *
080 * @param strPattern the motif to search.
081 *
082 * @throws RuntimeException if regexp has a compilation error.
083 */
084 protected MotifMatcher(final String strPattern) {
085
086 try {
087 aminoAcidMotifPattern = Pattern.compile(strPattern);
088 } catch (final PatternSyntaxException e) {
089 throw new RuntimeException("Regular expression positioner error", e);
090 }
091 }
092
093 public static <T> MotifMatcher<T> newInstance(String pattern) {
094 return new MotifMatcher<T>(pattern);
095 }
096
097 /**
098 * Override setSequence() to convert byte sequence representation to String
099 * in order to use Java Regular Expression.
100 */
101 @Override
102 public void setSequence(final SymbolSequence<T> sequence) {
103 super.setSequence(sequence);
104
105 symbolSequence = sequence.toSymbolString();
106 }
107
108 /** @return the motif pattern */
109 public final Pattern getPattern() {
110 return aminoAcidMotifPattern;
111 }
112
113 @SuppressWarnings("unchecked")
114 public MotifMatcher.Iterator iterator() {
115 return new Iterator();
116 }
117
118 /**
119 * equal instances have equal patterns.
120 */
121 @SuppressWarnings("unchecked")
122 public boolean equals(Object o) {
123 if (o instanceof MotifMatcher) {
124 String pat = ((MotifMatcher) o).aminoAcidMotifPattern.pattern();
125 return (pat.equals(aminoAcidMotifPattern.pattern()));
126 }
127 return false;
128 }
129
130 public int hashCode() {
131 return aminoAcidMotifPattern.pattern().hashCode();
132 }
133 }