Python Code

import unicodedata

def split_clusters(txt):
    """ Generate grapheme clusters for the Devanagari text."""
    cluster = u''
    end = False

    for char in txt:
        category = unicodedata.category(char)
        if (category == 'Lo' and end ) or category[0] == 'M':
            cluster = cluster + char
        else:
            if cluster:
                yield cluster
            cluster = char
        end = unicodedata.name(char).endswith(' SIGN VIRAMA')

    if cluster:
        yield cluster

Go Code

import (
    "strings"
    "unicode"
    "golang.org/x/text/unicode/runenames"
)

func splitClusters(txt string) (ret []string) {
    cluster := ""
    end := false
    for _, x := range txt {
        if (unicode.In(x, unicode.Lo) && end) ||
            unicode.In(x, unicode.M, unicode.Mc, unicode.Me, unicode.Mn) {
            cluster += string(x)
        } else {
            if len(cluster) > 0 {
                if strings.TrimSpace(cluster) != "" {
                    ret = append(ret, cluster)
                }
            }
            cluster = string(x)
        }
        end = strings.HasSuffix(runenames.Name(x), " SIGN VIRAMA")
    }
    if len(cluster) > 0 {
        if strings.TrimSpace(cluster) != "" {
            ret = append(ret, cluster)
        }
    }
    return
}

Python Code#

Go Code#

Python Code

Go Code