refactor: redesign StringMatchFiniteAutomata
(#5222)
* refactor * add test * fix clang * fix pmd * remove main method * refactor searchPattern with private class * fix checkstyle * Update src/main/java/com/thealgorithms/others/StringMatchFiniteAutomata.java Co-authored-by: Piotr Idzik <65706193+vil02@users.noreply.github.com> * Update src/main/java/com/thealgorithms/others/StringMatchFiniteAutomata.java Co-authored-by: Piotr Idzik <65706193+vil02@users.noreply.github.com> * Update src/main/java/com/thealgorithms/others/StringMatchFiniteAutomata.java Co-authored-by: Piotr Idzik <65706193+vil02@users.noreply.github.com> * fix clang * tests: add more test cases --------- Co-authored-by: Samuel Facchinello <samuel.facchinello@piksel.com> Co-authored-by: Piotr Idzik <65706193+vil02@users.noreply.github.com>
This commit is contained in:
parent
c7ee0e73c2
commit
9973b8efc8
@ -1,80 +1,125 @@
|
|||||||
package com.thealgorithms.others;
|
package com.thealgorithms.others;
|
||||||
|
|
||||||
/**
|
import java.util.Set;
|
||||||
* @author Prateek Kumar Oraon (https://github.com/prateekKrOraon)
|
import java.util.TreeSet;
|
||||||
*/
|
|
||||||
import java.util.Scanner;
|
|
||||||
|
|
||||||
// An implementation of string matching using finite automata
|
/**
|
||||||
|
* A class to perform string matching using <a href="https://en.wikipedia.org/wiki/Finite-state_machine">finite automata</a>.
|
||||||
|
*
|
||||||
|
* @author <a href="https://github.com/prateekKrOraon">Prateek Kumar Oraon</a>
|
||||||
|
*/
|
||||||
public final class StringMatchFiniteAutomata {
|
public final class StringMatchFiniteAutomata {
|
||||||
|
|
||||||
|
// Constants
|
||||||
|
private static final int CHARS = Character.MAX_VALUE + 1; // Total number of characters in the input alphabet
|
||||||
|
|
||||||
|
// Private constructor to prevent instantiation
|
||||||
private StringMatchFiniteAutomata() {
|
private StringMatchFiniteAutomata() {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final int CHARS = 256;
|
/**
|
||||||
public static int[][] fa;
|
* Searches for the pattern in the given text using finite automata.
|
||||||
public static Scanner scanner = null;
|
*
|
||||||
|
* @param text The text to search within.
|
||||||
|
* @param pattern The pattern to search for.
|
||||||
|
*/
|
||||||
|
public static Set<Integer> searchPattern(final String text, final String pattern) {
|
||||||
|
final var stateTransitionTable = computeStateTransitionTable(pattern);
|
||||||
|
FiniteAutomata finiteAutomata = new FiniteAutomata(stateTransitionTable);
|
||||||
|
|
||||||
public static void main(String[] args) {
|
Set<Integer> indexFound = new TreeSet<>();
|
||||||
scanner = new Scanner(System.in);
|
for (int i = 0; i < text.length(); i++) {
|
||||||
System.out.println("Enter String");
|
finiteAutomata.consume(text.charAt(i));
|
||||||
String text = scanner.nextLine();
|
|
||||||
System.out.println("Enter pattern");
|
|
||||||
String pat = scanner.nextLine();
|
|
||||||
|
|
||||||
searchPat(text, pat);
|
if (finiteAutomata.getState() == pattern.length()) {
|
||||||
|
indexFound.add(i - pattern.length() + 1);
|
||||||
scanner.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void searchPat(String text, String pat) {
|
|
||||||
int m = pat.length();
|
|
||||||
int n = text.length();
|
|
||||||
|
|
||||||
fa = new int[m + 1][CHARS];
|
|
||||||
|
|
||||||
computeFA(pat, m, fa);
|
|
||||||
|
|
||||||
int state = 0;
|
|
||||||
for (int i = 0; i < n; i++) {
|
|
||||||
state = fa[state][text.charAt(i)];
|
|
||||||
|
|
||||||
if (state == m) {
|
|
||||||
System.out.println("Pattern found at index " + (i - m + 1));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return indexFound;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Computes finite automata for the pattern
|
/**
|
||||||
public static void computeFA(String pat, int m, int[][] fa) {
|
* Computes the finite automata table for the given pattern.
|
||||||
for (int state = 0; state <= m; ++state) {
|
*
|
||||||
|
* @param pattern The pattern to preprocess.
|
||||||
|
* @return The state transition table.
|
||||||
|
*/
|
||||||
|
private static int[][] computeStateTransitionTable(final String pattern) {
|
||||||
|
final int patternLength = pattern.length();
|
||||||
|
int[][] stateTransitionTable = new int[patternLength + 1][CHARS];
|
||||||
|
|
||||||
|
for (int state = 0; state <= patternLength; ++state) {
|
||||||
for (int x = 0; x < CHARS; ++x) {
|
for (int x = 0; x < CHARS; ++x) {
|
||||||
fa[state][x] = getNextState(pat, m, state, x);
|
stateTransitionTable[state][x] = getNextState(pattern, patternLength, state, x);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int getNextState(String pat, int m, int state, int x) {
|
return stateTransitionTable;
|
||||||
// if current state is less than length of pattern
|
}
|
||||||
// and input character of pattern matches the character in the alphabet
|
|
||||||
// then automata goes to next state
|
/**
|
||||||
if (state < m && x == pat.charAt(state)) {
|
* Gets the next state for the finite automata.
|
||||||
|
*
|
||||||
|
* @param pattern The pattern being matched.
|
||||||
|
* @param patternLength The length of the pattern.
|
||||||
|
* @param state The current state.
|
||||||
|
* @param x The current character from the input alphabet.
|
||||||
|
* @return The next state.
|
||||||
|
*/
|
||||||
|
private static int getNextState(final String pattern, final int patternLength, final int state, final int x) {
|
||||||
|
// If the current state is less than the length of the pattern
|
||||||
|
// and the character matches the pattern character, go to the next state
|
||||||
|
if (state < patternLength && x == pattern.charAt(state)) {
|
||||||
return state + 1;
|
return state + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for the highest prefix which is also a suffix
|
||||||
for (int ns = state; ns > 0; ns--) {
|
for (int ns = state; ns > 0; ns--) {
|
||||||
if (pat.charAt(ns - 1) == x) {
|
if (pattern.charAt(ns - 1) == x) {
|
||||||
|
boolean match = true;
|
||||||
for (int i = 0; i < ns - 1; i++) {
|
for (int i = 0; i < ns - 1; i++) {
|
||||||
if (pat.charAt(i) != pat.charAt(state - ns + i + 1)) {
|
if (pattern.charAt(i) != pattern.charAt(state - ns + i + 1)) {
|
||||||
|
match = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (i == ns - 1) {
|
if (match) {
|
||||||
return ns;
|
return ns;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
// If no prefix which is also a suffix is found, return 0
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class representing the finite automata for pattern matching.
|
||||||
|
*/
|
||||||
|
private static final class FiniteAutomata {
|
||||||
|
private int state = 0;
|
||||||
|
private final int[][] stateTransitionTable;
|
||||||
|
|
||||||
|
private FiniteAutomata(int[][] stateTransitionTable) {
|
||||||
|
this.stateTransitionTable = stateTransitionTable;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Consumes an input character and transitions to the next state.
|
||||||
|
*
|
||||||
|
* @param input The input character.
|
||||||
|
*/
|
||||||
|
private void consume(final char input) {
|
||||||
|
state = stateTransitionTable[state][input];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the current state of the finite automata.
|
||||||
|
*
|
||||||
|
* @return The current state.
|
||||||
|
*/
|
||||||
|
private int getState() {
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,23 @@
|
|||||||
|
package com.thealgorithms.others;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
import org.junit.jupiter.params.ParameterizedTest;
|
||||||
|
import org.junit.jupiter.params.provider.Arguments;
|
||||||
|
import org.junit.jupiter.params.provider.MethodSource;
|
||||||
|
|
||||||
|
class StringMatchFiniteAutomataTest {
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@MethodSource("provideTestCases")
|
||||||
|
void searchPattern(String text, String pattern, Set<Integer> expectedOutput) {
|
||||||
|
assertEquals(expectedOutput, StringMatchFiniteAutomata.searchPattern(text, pattern));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Stream<Arguments> provideTestCases() {
|
||||||
|
return Stream.of(Arguments.of("abcbcabc", "abc", Set.of(0, 5)), Arguments.of("", "abc", Set.of()), Arguments.of("", "", Set.of()), Arguments.of("a", "b", Set.of()), Arguments.of("a", "a", Set.of(0)), Arguments.of("abcdabcabcabcd", "abcd", Set.of(0, 10)), Arguments.of("abc", "bcd", Set.of()),
|
||||||
|
Arguments.of("abcdefg", "xyz", Set.of()), Arguments.of("abcde", "", Set.of(1, 2, 3, 4, 5)), Arguments.of("abcabcabc", "abc", Set.of(0, 3, 6)), Arguments.of("abcabcabc", "abcabcabc", Set.of(0)), Arguments.of("aaabbbaaa", "aaa", Set.of(0, 6)), Arguments.of("abcdefg", "efg", Set.of(4)));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user