# Optimal Mismatch Algorithm

This algorithm works by scanning pattern characters from the least frequent one to the most frequent one. Doing so one may hope to have a mismatch most of the times and thus to scan the whole text very quickly. One needs to know the frequencies of each of the character of the alphabet.

```									```private static char[] _x, _y;
private static int _m, _n;
private static int[] _adaptedGs, _qsBc, _frequency;
private static Pattern[] _pattern;

private static void OrderPattern(char[] x, Pattern[] pat, int[] freq)
{
for (int i = 0; i < x.Length; ++i)
{
Pattern ptrn = new Pattern();
ptrn.Location = i;
ptrn.Character = x[i];
pat[i] = ptrn;
}

QuickSortPattern(pat, 0, x.Length - 1, freq);
}

private static void QuickSortPattern(Pattern[] pat, int low, int n, int[] freq)
{
int lo = low;
int hi = n;

if (lo >= n) return;

Pattern mid = pat[(lo + hi) / 2];

while (lo < hi)
{
while (lo < hi && OptimalPatternCompare(pat[lo], mid, freq) < 0) lo++;
while (lo < hi && OptimalPatternCompare(pat[hi], mid, freq) > 0) hi--;

if (lo < hi)
{
Pattern temp = pat[lo];
pat[lo] = pat[hi];
pat[hi] = temp;
}
}

if (hi < lo)
{
int temp = hi;
hi = lo;
lo = temp;
}

QuickSortPattern(pat, low, lo, freq);
QuickSortPattern(pat, lo == low ? lo + 1 : lo, n, freq);
}

private static int OptimalPatternCompare(Pattern pat1, Pattern pat2, int[] freq)
{
int fx = freq[pat1.Character] - freq[pat2.Character];
return (fx != 0 ? (fx > 0 ? 1 : -1) : (pat2.Location - pat1.Location));
}

private static int MatchShift(char[] x, int ploc, int lShift, Pattern[] pat)
{
int i, j;

for (; lShift < x.Length; ++lShift)
{
i = ploc;
while (--i >= 0)
{
if ((j = (pat[i].Location - lShift)) < 0) continue;
if (pat[i].Character != x[j]) break;
}
if (i < 0) break;
}

return (lShift);
}

{
int lShift, i, pLoc;

for (pLoc = 1; pLoc <= x.Length; ++pLoc)
{
lShift = MatchShift(x, pLoc, lShift, pat);
}

for (pLoc = 0; pLoc < x.Length; ++pLoc)
{
while (lShift < x.Length)
{
i = pat[pLoc].Location - lShift;
if (i < 0 || pat[pLoc].Character != x[i]) break;
++lShift;
lShift = MatchShift(x, pLoc, lShift, pat);
}
}
}

private static int[] CalculateCharFrequency(char[] x, char[] y, int z)
{
int i;
int[] freq = new int[z];
for (i = 0; i < x.Length; i++) freq[x[i]]++;
for (i = 0; i < y.Length; i++) freq[y[i]]++;
return freq;
}

private static void PreQsBc(char[] x, int[] qsBc)
{
int i, m = x.Length;

for (i = 0; i < qsBc.Length; ++i)
qsBc[i] = m + 1;

for (i = 0; i < m; ++i)
qsBc[x[i]] = m - i;
}

private static void SetupOptimalSearch()
{
OrderPattern(_x, _pattern, _frequency);
PreQsBc(_x, _qsBc);
}

public static void InitOptimalSearch(string pattern, string source)
{
_x = pattern.ToCharArray();
_y = source.ToCharArray();
_m = _x.Length;
_n = _y.Length;
_adaptedGs = new int[_m + 1];
_qsBc = new int[65536];
_frequency = CalculateCharFrequency(_x, _y, 65536);
_pattern = new Pattern[_m];
}

public static Result FindAll()
{
int i, j;
List<int> result = new List<int>();
SetupOptimalSearch();

j = 0;
int jOld = 0;
while (j <= _n - _m)
{
i = 0;
while (i < _m && _pattern[i].Character == _y[j + _pattern[i].Location]) ++i;

if (i >= _m)

jOld = j;
if (j < _n - _m)
j += Math.Max(_adaptedGs[i], _qsBc[_y[j + _m]]);
else
}

return new Result(jOld, result);
}

public struct Result
{
public int Comp;
public List<int> Indexes;

public Result(int comp, List<int> indexes)
{
Comp = comp;
Indexes = indexes;
}
}

public struct Pattern
{
public int Location;
public char Character;
}```
```

### Example

```									```string source = "GCATCGCAGAGAGTATACAGTACG";
string pattern = "GCAGAGAG";
InitOptimalSearch(pattern, source);
Result result = FindAll();```
```

### Output

```									```result {
Comp: 14
Indexes: { 5 }
}```
```