defining event paths

5 years ago · afd6577c9f
parent 8cff8b3cd3
commit afd6577c9f
2 changed files with 64 additions and 8 deletions
--- a/path.go
+++ b/path.go
@ -40,7 +40,7 @@ func (p *Path) String() string {
 // constructing a hierarchical path.
 func (p *Path) Child(name string) *Path {
 	return &Path{
-		name:   name,
+		name:   MakeSafeName(name),
 		parent: p,
 	}
 }
@ -48,21 +48,24 @@ func (p *Path) Child(name string) *Path {
 // NewPath creates a new path with a given root name. The supplied name string
 // is cleaned of unsafe characters.
 func NewPath(name string) *Path {
-	return &Path{name: name}
+	return &Path{name: MakeSafeName(name)}
 }

 // IsSafeName determines whether a provided path name is considered to be a
 // "safe" name by the standards of blammo. A safe name in the context of blammo
 // is a name that consists of only unicode letters and numbers, plus the hyphen
-// and underscore characters. Note that the character classes being tested
-// against are unicode letters and numbers, not ascii letters and numbers;
-// letters with accents and letters that do not appear in English are
-// permitted. The purpose of the safe name guarantees is to ensure that logs
-// written by blammo are written with the expectation of being parseable.
+// (-), underscore (_), and colon (:) characters. Note that the character
+// classes being tested against are unicode letters and numbers, not ascii
+// letters and numbers; letters with accents and letters that do not appear in
+// English are permitted.
+//
+// The goal of the safe name checker is to ensure that logs written by
+// blammo can be written in any (human) language while maintaining a few rules
+// to ensure the logs can be reasonably straightforward to parse and search.
 func IsSafeName(name string) bool {
 	runes := []rune(name)
 	for _, r := range runes {
-		if r == '-' || r == '_' {
+		if r == '-' || r == '_' || r == ':' {
 			continue
 		}
 		if !unicode.In(r, unicode.Letter, unicode.Number) {
@ -71,3 +74,31 @@ func IsSafeName(name string) bool {
 	}
 	return true
 }
+
+// MakeSafeName takes a string and transforms it, if necessary, into a string
+// that is considered to be a safe name. The transformation strips all leading
+// and trailing whitespace, converts intermediate spacing characters into
+// underscores, and converts other unsafe characters into hyphens.
+func MakeSafeName(name string) string {
+	if IsSafeName(name) {
+		return name
+	}
+	name = strings.TrimSpace(name)
+	runes := []rune(name)
+	out := make([]rune, 0, len(runes))
+	for _, r := range runes {
+		if r == '-' || r == '_' || r == ':' {
+			continue
+		}
+		if unicode.In(r, unicode.Letter, unicode.Number) {
+			out = append(out, r)
+			continue
+		}
+		if unicode.IsSpace(r) {
+			out = append(out, '_')
+			continue
+		}
+		out = append(out, '-')
+	}
+	return string(out)
+}
--- a/path_test.go
+++ b/path_test.go
@ -19,6 +19,11 @@ func TestPath(t *testing.T) {
 	if p.String() != "alice/bob/carol" {
 		t.Error("bad grandchild generation")
 	}
+
+	p = p.Child(" dave ")
+	if p.String() != "alice/bob/carol/dave" {
+		t.Error("bad sanitation transformation")
+	}
 }

 func TestSafeNames(t *testing.T) {
@ -28,7 +33,15 @@ func TestSafeNames(t *testing.T) {
 		"1one",
 		"niño",
 		"garçon",
+		"alice-bob",
+		"alice_bob",
+		"alice:bob",
 		"你好",
+		// this string contains a unicode zero-width non-joiner character. Not
+		// sure how I feel about this being considered safe. On the one hand
+		// it's necessary for some languages, on the other hand it has the
+		// propensity to create confusing homoglyph situations.
+		string([]rune{'o', 0x8204, 'n', 'e'}),
 		"",
 	}

@ -37,4 +50,16 @@ func TestSafeNames(t *testing.T) {
 			t.Errorf("expected safe name is considered unsafe: %s", n)
 		}
 	}
+
+	unsafeNames := []string{
+		" one",
+		"one ",
+		"alice/bob",
+		"alice bob",
+	}
+	for _, n := range unsafeNames {
+		if IsSafeName(n) {
+			t.Errorf("expected unsafe name is considered safe: %s", n)
+		}
+	}
 }