Skip to content

Commit

Permalink
Merge pull request #19 from varnamproject/improve-search-symbol-table
Browse files Browse the repository at this point in the history
Improve search symbol table, add all config vars to varnam_config(), deprecated previous config functions
  • Loading branch information
subins2000 authored Dec 25, 2021
2 parents 8b58943 + 82f958d commit ddcabaa
Show file tree
Hide file tree
Showing 10 changed files with 120 additions and 33 deletions.
38 changes: 37 additions & 1 deletion c-shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,22 +271,25 @@ func varnam_set_indic_digits(varnamHandleID C.int, val C.int) {
varnam_config(varnamHandleID, C.VARNAM_CONFIG_USE_INDIC_DIGITS, val)
}

// TODO move all config to varnam_config()
// Deprecated. Use varnam_config()
//export varnam_set_dictionary_suggestions_limit
func varnam_set_dictionary_suggestions_limit(varnamHandleID C.int, val C.int) {
getVarnamHandle(varnamHandleID).varnam.DictionarySuggestionsLimit = int(val)
}

// Deprecated. Use varnam_config()
//export varnam_set_pattern_dictionary_suggestions_limit
func varnam_set_pattern_dictionary_suggestions_limit(varnamHandleID C.int, val C.int) {
getVarnamHandle(varnamHandleID).varnam.PatternDictionarySuggestionsLimit = int(val)
}

// Deprecated. Use varnam_config()
//export varnam_set_tokenizer_suggestions_limit
func varnam_set_tokenizer_suggestions_limit(varnamHandleID C.int, val C.int) {
getVarnamHandle(varnamHandleID).varnam.TokenizerSuggestionsLimit = int(val)
}

// Deprecated. Use varnam_config()
//export varnam_set_dictionary_match_exact
func varnam_set_dictionary_match_exact(varnamHandleID C.int, val C.int) {
if val == 0 {
Expand Down Expand Up @@ -404,6 +407,27 @@ func varnam_get_vst_path(varnamHandleID C.int) *C.char {
return C.CString(handle.varnam.VSTPath)
}

//export varnam_new_search_symbol
func varnam_new_search_symbol(resultPointer **C.struct_Symbol_t) C.int {
symbol := govarnam.NewSearchSymbol()
*resultPointer = C.makeSymbol(
C.int(symbol.Identifier),
C.int(symbol.Type),
C.int(symbol.MatchType),
C.CString(symbol.Pattern),
C.CString(symbol.Value1),
C.CString(symbol.Value2),
C.CString(symbol.Value3),
C.CString(symbol.Tag),
C.int(symbol.Weight),
C.int(symbol.Priority),
C.int(symbol.AcceptCondition),
C.int(symbol.Flags),
)

return C.VARNAM_SUCCESS
}

//export varnam_search_symbol_table
func varnam_search_symbol_table(varnamHandleID C.int, id C.int, searchCriteria C.struct_Symbol_t, resultPointer **C.varray) C.int {
ctx, cancel := makeContext(id)
Expand Down Expand Up @@ -633,6 +657,18 @@ func varnam_config(varnamHandleID C.int, key C.int, value C.int) C.int {
case C.VARNAM_CONFIG_IGNORE_DUPLICATE_TOKEN:
handle.varnam.VSTMakerConfig.IgnoreDuplicateTokens = cintToBool(value)
break
case C.VARNAM_CONFIG_SET_DICTIONARY_SUGGESTIONS_LIMIT:
handle.varnam.DictionarySuggestionsLimit = int(value)
break
case C.VARNAM_CONFIG_SET_PATTERN_DICTIONARY_SUGGESTIONS_LIMIT:
handle.varnam.PatternDictionarySuggestionsLimit = int(value)
break
case C.VARNAM_CONFIG_SET_TOKENIZER_SUGGESTIONS_LIMIT:
handle.varnam.TokenizerSuggestionsLimit = int(value)
break
case C.VARNAM_CONFIG_SET_DICTIONARY_MATCH_EXACT:
handle.varnam.DictionaryMatchExact = cintToBool(value)
break
}

return C.VARNAM_SUCCESS
Expand Down
13 changes: 9 additions & 4 deletions c-shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@
#define VARNAM_ERROR 2
#define VARNAM_CANCELLED 3

#define VARNAM_CONFIG_USE_DEAD_CONSONANTS 100
#define VARNAM_CONFIG_IGNORE_DUPLICATE_TOKEN 101
#define VARNAM_CONFIG_ENABLE_SUGGESTIONS 102
#define VARNAM_CONFIG_USE_INDIC_DIGITS 103
#define VARNAM_CONFIG_USE_DEAD_CONSONANTS 100
#define VARNAM_CONFIG_IGNORE_DUPLICATE_TOKEN 101
// VARNAM_CONFIG_ENABLE_SUGGESTIONS hasn't been implemented yet
#define VARNAM_CONFIG_ENABLE_SUGGESTIONS 102
#define VARNAM_CONFIG_USE_INDIC_DIGITS 103
#define VARNAM_CONFIG_SET_DICTIONARY_SUGGESTIONS_LIMIT 104
#define VARNAM_CONFIG_SET_PATTERN_DICTIONARY_SUGGESTIONS_LIMIT 105
#define VARNAM_CONFIG_SET_TOKENIZER_SUGGESTIONS_LIMIT 106
#define VARNAM_CONFIG_SET_DICTIONARY_MATCH_EXACT 107

typedef struct Suggestion_t {
char* Word;
Expand Down
6 changes: 6 additions & 0 deletions govarnam/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ var (
VersionString string
)

// Go's struct int has default value 0.
// For SearchSymbolTable usecase this is a problem.
// Hence we use a constructor with default value setting.
// https://stackoverflow.com/q/37135193/1372424
const STRUCT_INT_DEFAULT_VALUE = -1

/* General */
const ZWNJ = "\u200c"
const ZWJ = "\u200d"
Expand Down
4 changes: 4 additions & 0 deletions govarnam/govarnam.go
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,10 @@ func (varnam *Varnam) ReverseTransliterate(word string) ([]Suggestion, error) {

tokens := varnam.splitTextByConjunct(ctx, word)

if varnam.Debug {
fmt.Println(tokens)
}

for i, token := range tokens {
for j, symbol := range token.symbols {
tokens[i].symbols[j].Value1 = symbol.Pattern
Expand Down
14 changes: 10 additions & 4 deletions govarnam/govarnam_ml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,19 +206,25 @@ func TestMLAtomicChil(t *testing.T) {

func TestMLReverseTransliteration(t *testing.T) {
varnam := getVarnamInstance("ml")
oldLimit := varnam.TokenizerSuggestionsLimit
varnam.TokenizerSuggestionsLimit = 30

sugs, err := varnam.ReverseTransliterate("മലയാളം")
checkError(err)

// The order of this will fail if VST weights change
expected := []string{"malayaaLam", "malayALam", "malayaalam", "malayAlam", "malayaLam", "malayalam"}
for i, sug := range sugs {
assertEqual(t, sug.Word, expected[i])
expected := []string{"malayaaLam", "malayaaLam_", "malayALam", "malayALam_", "malayaalam", "malayaalam_", "malayAlam", "malayAlam_", "malayaLam", "malayaLam_", "malayalam", "malayalam_"}

assertEqual(t, len(sugs), len(expected))
for i, expectedWord := range expected {
assertEqual(t, sugs[i].Word, expectedWord)
}

sugs, err = varnam.ReverseTransliterate("2019 ഏപ്രിൽ 17-ന് മലയാളം വിക്കിപീഡിയയിലെ ലേഖനങ്ങളുടെ എണ്ണം 63,000 പിന്നിട്ടു.")

assertEqual(t, sugs[0].Word, "2019 Epril 17-n~ malayaaLam vikkipeeDiyayile lEkhanangaLuTe eNNam 63,000 pinnittu.")

varnam.TokenizerSuggestionsLimit = oldLimit
}

func TestDictionaryLimit(t *testing.T) {
Expand Down Expand Up @@ -389,7 +395,7 @@ func TestMLExportAndImport(t *testing.T) {
func TestMLSearchSymbolTable(t *testing.T) {
varnam := getVarnamInstance("ml")

var search Symbol
search := NewSearchSymbol()
search.Value1 = "ക"
results, err := varnam.SearchSymbolTable(context.Background(), search)
checkError(err)
Expand Down
9 changes: 6 additions & 3 deletions govarnam/govarnam_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ var testTempDir string

// AssertEqual checks if values are equal
// Thanks https://gist.github.com/samalba/6059502#gistcomment-2710184
func assertEqual(t *testing.T, a interface{}, b interface{}) {
if a == b {
func assertEqual(t *testing.T, value interface{}, expected interface{}) {
if value == expected {
return
}
debug.PrintStack()
t.Errorf("Received %v (type %v), expected %v (type %v)", a, reflect.TypeOf(a), b, reflect.TypeOf(b))
t.Errorf("Received %v (type %v), expected %v (type %v)", value, reflect.TypeOf(value), expected, reflect.TypeOf(expected))
}

func checkError(err error) {
Expand Down Expand Up @@ -65,6 +65,9 @@ func getVarnamInstance(schemeID string) *Varnam {
if ok {
return instance
}

log.Fatalf("Varnam instance for %s not found", schemeID)

return nil
}

Expand Down
21 changes: 19 additions & 2 deletions govarnam/symbol.go
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,23 @@ func removeNonExactTokens(tokens []Token) []Token {
return tokens
}

// NewSearchSymbol a constructor for making Symbol.
// We're doing this because default int value in
// go structs is 0. This won't work with searching
// because fields can have 0 value.
// https://stackoverflow.com/q/37135193/137242
func NewSearchSymbol() Symbol {
symbol := Symbol{}
symbol.Identifier = STRUCT_INT_DEFAULT_VALUE
symbol.Type = STRUCT_INT_DEFAULT_VALUE
symbol.MatchType = STRUCT_INT_DEFAULT_VALUE
symbol.Weight = STRUCT_INT_DEFAULT_VALUE
symbol.Priority = STRUCT_INT_DEFAULT_VALUE
symbol.AcceptCondition = STRUCT_INT_DEFAULT_VALUE
symbol.Flags = STRUCT_INT_DEFAULT_VALUE
return symbol
}

// SearchSymbolTable For searching symbol table
func (varnam *Varnam) SearchSymbolTable(ctx context.Context, searchCriteria Symbol) ([]Symbol, error) {
var (
Expand All @@ -534,7 +551,7 @@ func (varnam *Varnam) SearchSymbolTable(ctx context.Context, searchCriteria Symb
return
}
} else {
if val.(int) == 0 {
if valInt, ok := val.(int); !ok || valInt == STRUCT_INT_DEFAULT_VALUE {
return
}
}
Expand Down Expand Up @@ -587,7 +604,7 @@ func (varnam *Varnam) SearchSymbolTable(ctx context.Context, searchCriteria Symb
}

func (varnam *Varnam) getVirama() (string, error) {
var viramaSymbol Symbol
viramaSymbol := NewSearchSymbol()
viramaSymbol.Pattern = "~"
results, _ := varnam.SearchSymbolTable(context.Background(), viramaSymbol)

Expand Down
7 changes: 4 additions & 3 deletions govarnam/vst_maker.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ func (varnam *Varnam) vmPersistToken(pattern string, value1 string, value2 strin
return fmt.Errorf("arguments invalid")
}

persisted, err := varnam.vmAlreadyPersisted(pattern, value1, matchType)
persisted, err := varnam.vmAlreadyPersisted(pattern, value1, matchType, acceptCondition)
if err != nil {
return err
}
Expand Down Expand Up @@ -260,9 +260,10 @@ func (varnam *Varnam) vmPersistToken(pattern string, value1 string, value2 strin
return nil
}

func (varnam *Varnam) vmAlreadyPersisted(pattern string, value1 string, matchType int) (bool, error) {
var searchCriteria Symbol
func (varnam *Varnam) vmAlreadyPersisted(pattern string, value1 string, matchType int, acceptCondition int) (bool, error) {
searchCriteria := NewSearchSymbol()
searchCriteria.Pattern = pattern
searchCriteria.AcceptCondition = acceptCondition

if matchType == VARNAM_MATCH_EXACT {
searchCriteria.MatchType = matchType
Expand Down
39 changes: 24 additions & 15 deletions govarnamgo/govarnamgo.go
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,29 @@ func (handle *VarnamHandle) GetVSTPath() string {
return C.GoString(cStr)
}

func makeGoSymbol(cSymbol *C.Symbol) Symbol {
var goSymbol Symbol
goSymbol.Identifier = int(cSymbol.Identifier)
goSymbol.Type = int(cSymbol.Type)
goSymbol.MatchType = int(cSymbol.MatchType)
goSymbol.Pattern = C.GoString(cSymbol.Pattern)
goSymbol.Value1 = C.GoString(cSymbol.Value1)
goSymbol.Value2 = C.GoString(cSymbol.Value2)
goSymbol.Value3 = C.GoString(cSymbol.Value3)
goSymbol.Tag = C.GoString(cSymbol.Tag)
goSymbol.Weight = int(cSymbol.Weight)
goSymbol.Priority = int(cSymbol.Priority)
goSymbol.AcceptCondition = int(cSymbol.AcceptCondition)
goSymbol.Flags = int(cSymbol.Flags)
return goSymbol
}

func NewSearchSymbol() Symbol {
var resultPointer *C.Symbol
C.varnam_new_search_symbol(&resultPointer)
return makeGoSymbol(resultPointer)
}

// SearchSymbolTable search VST
func (handle *VarnamHandle) SearchSymbolTable(ctx context.Context, searchCriteria Symbol) []Symbol {
var goResults []Symbol
Expand Down Expand Up @@ -671,21 +694,7 @@ func (handle *VarnamHandle) SearchSymbolTable(ctx context.Context, searchCriteri
for i < int(C.varray_length(resultPointer)) {
result := (*C.Symbol)(C.varray_get(resultPointer, C.int(i)))

var goResult Symbol
goResult.Identifier = int(result.Identifier)
goResult.Type = int(result.Type)
goResult.MatchType = int(result.MatchType)
goResult.Pattern = C.GoString(result.Pattern)
goResult.Value1 = C.GoString(result.Value1)
goResult.Value2 = C.GoString(result.Value2)
goResult.Value3 = C.GoString(result.Value3)
goResult.Tag = C.GoString(result.Tag)
goResult.Weight = int(result.Weight)
goResult.Priority = int(result.Priority)
goResult.AcceptCondition = int(result.AcceptCondition)
goResult.Flags = int(result.Flags)

goResults = append(goResults, goResult)
goResults = append(goResults, makeGoSymbol(result))
i++
}

Expand Down
2 changes: 1 addition & 1 deletion govarnamgo/govarnamgo_ml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func TestRecentlyLearnedWords(t *testing.T) {
func TestSearchSymbolTable(t *testing.T) {
varnam := getVarnamInstance("ml")

var symbol Symbol
symbol := NewSearchSymbol()
symbol.Pattern = "la"
result := varnam.SearchSymbolTable(context.Background(), symbol)

Expand Down

0 comments on commit ddcabaa

Please sign in to comment.