001    /**
002     * Copyright (c) 2010, SIB. All rights reserved.
003     * 
004     * SIB (Swiss Institute of Bioinformatics) - http://www.isb-sib.ch Host -
005     * https://sourceforge.net/projects/javaprotlib/
006     * 
007     * Redistribution and use in source and binary forms, with or without
008     * modification, are permitted provided that the following conditions are met:
009     * Redistributions of source code must retain the above copyright notice, this
010     * list of conditions and the following disclaimer. Redistributions in binary
011     * form must reproduce the above copyright notice, this list of conditions and
012     * the following disclaimer in the documentation and/or other materials provided
013     * with the distribution. Neither the name of the SIB/GENEBIO nor the names of
014     * its contributors may be used to endorse or promote products derived from this
015     * software without specific prior written permission.
016     * 
017     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018     * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
020     * ARE DISCLAIMED. IN NO EVENT SHALL SIB/GENEBIO BE LIABLE FOR ANY DIRECT,
021     * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022     * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023     * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
024     * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026     * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027     */
028    package org.expasy.jpl.commons.collection.symbol.seq;
029    
030    
031    import java.util.regex.Matcher;
032    import java.util.regex.Pattern;
033    import java.util.regex.PatternSyntaxException;
034    
035    
036    /**
037     * This class allows to search a regular expression pattern all over a symbol
038     * sequence and store positions in its inner Iterator.
039     * 
040     * @author nikitin
041     * 
042     * @version 1.0
043     * 
044     */
045    public class MotifMatcher<T> extends AbstractSequencePositioner<T> {
046            
047            /** Motif searched on SymbolSequenceImpl object */
048            protected Pattern aminoAcidMotifPattern;
049            
050            /** String representation of SymbolSequenceImpl */
051            protected String symbolSequence;
052            
053            /**
054             * Inner iterator over pattern matching sequence positions.
055             * 
056             */
057            public class Iterator extends AbstractSequencePositioner<T>.Iterator {
058                    
059                    protected Matcher matcher;
060                    
061                    public Iterator() {
062                            assert (symbolSequence != null);
063                            matcher = aminoAcidMotifPattern.matcher(symbolSequence);
064                    }
065                    
066                    @Override
067                    public boolean foundNextPosition() {
068                            
069                            if (matcher.find()) {
070                                    super.nextPosition = matcher.start();
071                                    return true;
072                            }
073                            return false;
074                    }
075            }
076            
077            /**
078             * Create an instance for a given pattern.
079             * 
080             * @param strPattern the motif to search.
081             * 
082             * @throws RuntimeException if regexp has a compilation error.
083             */
084            protected MotifMatcher(final String strPattern) {
085                    
086                    try {
087                            aminoAcidMotifPattern = Pattern.compile(strPattern);
088                    } catch (final PatternSyntaxException e) {
089                            throw new RuntimeException("Regular expression positioner error", e);
090                    }
091            }
092            
093            public static <T> MotifMatcher<T> newInstance(String pattern) {
094                    return new MotifMatcher<T>(pattern);
095            }
096            
097            /**
098             * Override setSequence() to convert byte sequence representation to String
099             * in order to use Java Regular Expression.
100             */
101            @Override
102            public void setSequence(final SymbolSequence<T> sequence) {
103                    super.setSequence(sequence);
104                    
105                    symbolSequence = sequence.toSymbolString();
106            }
107            
108            /** @return the motif pattern */
109            public final Pattern getPattern() {
110                    return aminoAcidMotifPattern;
111            }
112            
113            @SuppressWarnings("unchecked")
114            public MotifMatcher.Iterator iterator() {
115                    return new Iterator();
116            }
117            
118            /**
119             * equal instances have equal patterns.
120             */
121            @SuppressWarnings("unchecked")
122            public boolean equals(Object o) {
123                    if (o instanceof MotifMatcher) {
124                            String pat = ((MotifMatcher) o).aminoAcidMotifPattern.pattern();
125                            return (pat.equals(aminoAcidMotifPattern.pattern()));
126                    }
127                    return false;
128            }
129            
130            public int hashCode() {
131                    return aminoAcidMotifPattern.pattern().hashCode();
132            }
133    }