Split Indic Words
Python Code import unicodedata def split_clusters(txt): """ Generate grapheme clusters for the Devanagari text.""" cluster = u'' end = False for char in txt: category = unicodedata.category(char) if (category == 'Lo' and end ) or category[0] == 'M': cluster = cluster + char else: if cluster: yield cluster cluster = char end = unicodedata.name(char).endswith(' SIGN VIRAMA') if cluster: yield cluster Go Code import ( "strings" "unicode" "golang.org/x/text/unicode/runenames" ) func splitClusters(txt string) (ret []string) { cluster := "" end := false for _, x := range txt { if (unicode....