RegularExpressionParser.java

/*
 *  Copyright (c) 2001-2024, Jean Tessier
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *
 *      * Redistributions of source code must retain the above copyright
 *        notice, this list of conditions and the following disclaimer.
 *
 *      * Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimer in the
 *        documentation and/or other materials provided with the distribution.
 *
 *      * Neither the name of Jean Tessier nor the names of his contributors
 *        may be used to endorse or promote products derived from this software
 *        without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
 *  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 *  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package com.jeantessier.text;

import java.util.*;

import org.apache.logging.log4j.*;

public class RegularExpressionParser {
    public static List<String> parseRE(String re) {
        List<String> result = new LinkedList<>();

        Logger logger = LogManager.getLogger();
        logger.debug("ParseRE \"{}\"", () -> re);

        int length = re.length();
        int start  = 0;
        int stop   = -1;

        while (start < length) {
            String separator = null;

            // Locate begining & determine separator
            while (start < length && stop < start) {
                if (re.charAt(start) == 'm' && (start + 1) < length) {
                    separator = re.substring(start + 1, start + 2);
                    stop = start + 2;
                } else if (re.charAt(start) == '/') {
                    separator = "/";
                    stop = start + 1;
                } else {
                    start++;
                }
            }

            logger.debug("start is " + start);
            logger.debug("separator is " + separator);

            // Locate end
            while (stop < length && start < stop) {
                stop = re.indexOf(separator, stop);
                logger.debug("indexOf() is " + stop);

                if (stop == -1 || re.charAt(stop - 1) != '\\') {

                    if (stop == -1) {
                        stop = length;
                    } else {
                        // Look for modifiers
                        stop++;
                        while (stop < length && (re.charAt(stop) == 'g' ||
                                                 re.charAt(stop) == 'i' ||
                                                 re.charAt(stop) == 'm' ||
                                                 re.charAt(stop) == 'o' ||
                                                 re.charAt(stop) == 's' ||
                                                 re.charAt(stop) == 'x')) {
                            stop++;
                        }
                    }

                    logger.debug("stop is " + stop);

                    // Add candidate
                    logger.debug("candidate is \"" + re.substring(start, stop) + "\"");
                    result.add(re.substring(start, stop));

                    // Move start
                    start = stop + 1;
                } else {
                    stop++;
                }
            }
        }

        logger.debug("ParseRE \"{}\" results in {}", () -> re, () -> result);

        return result;
    }
}