Aho–Corasick Algorithm

Aho–Corasick algorithm is a string searching algorithm. It is a kind of dictionary-matching algorithm that locates elements of a finite set of strings (the "dictionary") within an input text. It matches all strings simultaneously.



									Private Const MaxStates As Integer = 6 * 50 + 10
Private Const MaxChars As Integer = 26

Private Shared Out As Integer() = New Integer(MaxStates - 1) {}
Private Shared FF As Integer() = New Integer(MaxStates - 1) {}
Private Shared GF As Integer(,) = New Integer(MaxStates - 1, MaxChars - 1) {}

Private Shared Function BuildMatchingMachine(words As String(), Optional lowestChar As Char = "a"c, Optional highestChar As Char = "z"c) As Integer
	Out = Enumerable.Repeat(0, Out.Length).ToArray()
	FF = Enumerable.Repeat(-1, FF.Length).ToArray()

	For i As Integer = 0 To MaxStates - 1
		For j As Integer = 0 To MaxChars - 1
			GF(i, j) = -1
		Next
	Next

	Dim states As Integer = 1

	For i As Integer = 0 To words.Length - 1
		Dim keyword As String = words(i)
		Dim currentState As Integer = 0

		For j As Integer = 0 To keyword.Length - 1
			Dim c As Integer = Convert.ToInt32(keyword(j)) - Convert.ToInt32(lowestChar)

			If GF(currentState, c) = -1 Then
				GF(currentState, c) = System.Math.Max(System.Threading.Interlocked.Increment(states), states - 1)
			End If

			currentState = GF(currentState, c)
		Next

		Out(currentState) = Out(currentState) Or (1 << i)
	Next

	For c As Integer = 0 To MaxChars - 1
		If GF(0, c) = -1 Then
			GF(0, c) = 0
		End If
	Next

	Dim q As New List(Of Integer)()
	For c As Integer = 0 To Convert.ToInt32(highestChar) - Convert.ToInt32(lowestChar)
		If GF(0, c) <> -1 AndAlso GF(0, c) <> 0 Then
			FF(GF(0, c)) = 0
			q.Add(GF(0, c))
		End If
	Next

	While Convert.ToBoolean(q.Count)
		Dim state As Integer = q(0)
		q.RemoveAt(0)

		For c As Integer = 0 To Convert.ToInt32(highestChar) - Convert.ToInt32(lowestChar)
			If GF(state, c) <> -1 Then
				Dim failure As Integer = FF(state)

				While GF(failure, c) = -1
					failure = FF(failure)
				End While

				failure = GF(failure, c)
				FF(GF(state, c)) = failure
				Out(GF(state, c)) = Out(GF(state, c)) Or Out(failure)
				q.Add(GF(state, c))
			End If
		Next
	End While

	Return states
End Function

Private Shared Function FindNextState(currentState As Integer, nextInput As Char, Optional lowestChar As Char = "a"c) As Integer
	Dim answer As Integer = currentState
	Dim c As Integer = Convert.ToInt32(nextInput) - Convert.ToInt32(lowestChar)

	While GF(answer, c) = -1
		answer = FF(answer)
	End While

	Return GF(answer, c)
End Function

Public Shared Function FindAllStates(text As String, keywords As String(), Optional lowestChar As Char = "a"c, Optional highestChar As Char = "z"c) As List(Of Integer)
	BuildMatchingMachine(keywords, lowestChar, highestChar)

	Dim currentState As Integer = 0
	Dim retVal As New List(Of Integer)()

	For i As Integer = 0 To text.Length - 1
		currentState = FindNextState(currentState, text(i), lowestChar)

		If Out(currentState) = 0 Then
			Continue For
		End If

		For j As Integer = 0 To keywords.Length - 1
			If Convert.ToBoolean(Out(currentState) And (1 << j)) Then
				retVal.Insert(0, i - keywords(j).Length + 1)
			End If
		Next
	Next

	Return retVal
End Function
								


Example

									Dim keywords As String() = {"he", "she", "hers", "his"}

Dim text As String = "ahishers"

Dim states As List(Of Integer) = FindAllStates(text, keywords)
								


Output

									4
3
4
1