package com.boyter.SpellingCorrector; import java.util.*; import java.util.stream.Stream; /** * A simple spell checker based on a few implementations such as the infamous Peter Noving spell checker and * the like. Attempts to be highly performing by never changing the first character since we can assume that the * user got that correct. */ public class SpellingCorrector implements ISpellingCorrector { // word to count map - how may times a word is present - or a weight attached to a word private Mapdictionary = null; public SpellingCorrector(int lruCount) { this.dictionary = Collections.synchronizedMap(new LruCache<>(lruCount)); } @Override public void putWord(String word) { word = word.toLowerCase(); if (dictionary.containsKey(word)) { dictionary.put(word, (dictionary.get(word) + 1)); } else { dictionary.put(word, 1); } } @Override public String correct(String word) { if (word == null || word.trim().isEmpty()) { return word; } word = word.toLowerCase(); // If the word exists in our dictionary then return if (dictionary.containsKey(word)) { return word; } Map possibleMatches = new HashMap<>(); List closeEdits = wordEdits(word); for (String closeEdit: closeEdits) { if (dictionary.containsKey(closeEdit)) { possibleMatches.put(closeEdit, this.dictionary.get(closeEdit)); } } if (!possibleMatches.isEmpty()) { // Sorted least likely first Object[] matches = this.sortByValue(possibleMatches).keySet().toArray(); // Try to match anything of the same length first String bestMatch = ""; for(Object o: matches) { if (o.toString().length() == word.length()) { bestMatch = o.toString(); } } if (!bestMatch.trim().isEmpty()) { return bestMatch; } // Just return whatever is the best match return matches[matches.length - 1].toString(); } // Ok we did"t find anything, so lets run the edits function on the previous results and use those // this gives us results which are 2 characters away from whatever was entered List furtherEdits = new ArrayList<>(); for(String closeEdit: closeEdits) { furtherEdits.addAll(this.wordEdits(closeEdit)); } for (String futherEdit: furtherEdits) { if (dictionary.containsKey(futherEdit)) { possibleMatches.put(futherEdit, this.dictionary.get(futherEdit)); } } if (!possibleMatches.isEmpty()) { // Sorted least likely first Object[] matches = this.sortByValue(possibleMatches).keySet().toArray(); // Try to match anything of the same length first String bestMatch = ""; for(Object o: matches) { if (o.toString().length() == word.length()) { bestMatch = o.toString(); } } if (!bestMatch.trim().isEmpty()) { return bestMatch; } // Just return whatever is the best match return matches[matches.length - 1].toString(); } // If unable to find something better return the same string return word; } @Override public boolean containsWord(String word) { if (dictionary.containsKey(word)) { return true; } return false; } /** * Return a list of strings which are words similar to our one which could potentially be misspellings * Abuse the fact that a char can be used as an integer * Assume that they got the first letter correct for all edits to cut on CPU burn time */ private List wordEdits(String word) { List closeWords = new ArrayList (); for (int i = 1; i < word.length() + 1; i++) { for (char character = "a"; character <= "z"; character++) { // Maybe they forgot to type a letter? Try adding one StringBuilder sb = new StringBuilder(word); sb.insert(i, character); closeWords.add(sb.toString()); } } for (int i = 1; i < word.length(); i++) { for (char character = "a"; character <= "z"; character++) { // Maybe they mistyped a single letter? Try replacing them all StringBuilder sb = new StringBuilder(word); sb.setCharAt(i, character); closeWords.add(sb.toString()); // Maybe they added an extra letter? Try deleting one sb = new StringBuilder(word); sb.deleteCharAt(i); closeWords.add(sb.toString()); } } return closeWords; } /** * Sorts a map by value taken from * http://stackoverflow.com/questions/109383/sort-a-mapkey-value-by-values-java */ public static > Map sortByValue( Map map ) { Map result = new LinkedHashMap<>(); Stream > st = map.entrySet().stream(); st.sorted( Map.Entry.comparingByValue() ).forEachOrdered( e -> result.put(e.getKey(), e.getValue()) ); return result; } /** * A very simple LRU cache implementation that can be used for random data types. */ public class LruCache extends LinkedHashMap { private final int maxEntries; public LruCache(final int maxEntries) { super(maxEntries + 1, 1.0f, true); this.maxEntries = maxEntries; } @Override protected boolean removeEldestEntry(final Map.Entry eldest) { return super.size() > maxEntries; } } }
Related articles
scc 6
package com.boyter.SpellingCorrector; import java.util.*; import java.util.stream.Stream; /** * A simple spell checker based on a few implementations such as the infamous Peter Noving spell checker and * the like. Attempts to be highly performing by
scc 13
package com.boyter.SpellingCorrector; import java.util.*; import java.util.stream.Stream; /** * A simple spell checker based on a few implementations such as the infamous Peter Noving spell checker and * the like. Attempts to be highly performing by
scc bosque
//------------------------------------------------------------------------------------------------------- // Copyright (C) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license inform
scc flow9
import runtime; /* If we list all the natural numbers below 10 that are multiples of 3 or 5, we get 3, 5, 6 and 9. The sum of these multiples is 23. Find the sum of all the multiples of 3 or 5 below 1000. */ euler1(limit : int) -> int { foldRange
scc app 5cddf2000f4491a89a40
(window.webpackJsonp=window.webpackJsonp||[]).push([[2],[function(e,t,n){"use strict";e.exports=n(184)},function(e,t,n){n(21),n(5),n(4),n(6),n(60),n(29),n(3),n(59),n(13);var r,o,a,i=(r=0,o={util:{encode:function(e){return e instanceof a?new a(e.type,o.ut
scc qsharp
operation BellTest (count : Int, initial: Result) : (Int, Int, Int) { mutable numOnes = 0; mutable agree = 0; using ((q0, q1) = (Qubit(), Qubit())) { for (test in 1..count) { Set (initial, q0); Set (Zer
scc bootstrap grid.min
/*! * Bootstrap Grid v4.3.1 (https://getbootstrap.com/) * Copyright 2011-2019 The Bootstrap Authors * Copyright 2011-2019 Twitter, Inc. * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) */html{box-sizing:border-box;-ms-ove
scc racket
#lang racket/base (require racket/private/norm-arity) (provide normalize-arity normalized-arity? arity=? arity-includes?) (define (normalized-arity? a) (or (null? a) (arity? a) (and (list? a) ((length a) . >= . 2) (
scc matlab
function [net,stats] = cnn_train_dag(net, imdb, getBatch, varargin) %CNN_TRAIN_DAG Demonstrates training a CNN using the DagNN wrapper % CNN_TRAIN_DAG() is similar to CNN_TRAIN(), but works with % the DagNN wrapper instead of the SimpleNN wrapper.
scc 4
package com.boyter.SpellingCorrector; import java.util.*; import java.util.stream.Stream; /** * A simple spell checker based on a few implementations such as the infamous Peter Noving spell checker and * the like. Attempts to be highly performing by