AdGuardDNS/internal/agd/humanid.go
Andrey Meshkov 41f7e6cb22 Sync v2.8.0
2024-07-10 19:49:07 +03:00

280 lines
6.5 KiB
Go

package agd
import (
"bytes"
"fmt"
"strings"
"unicode/utf8"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/netutil"
"github.com/AdguardTeam/golibs/syncutil"
)
// HumanID is a more human-readable identifier of a device.
type HumanID string
const (
// MaxHumanIDLen is the maximum length of a human-readable device ID.
MaxHumanIDLen = netutil.MaxDomainLabelLen
// MinHumanIDLen is the minimum length of a human-readable device ID.
MinHumanIDLen = 1
)
// NewHumanID converts a simple string into a HumanID and makes sure that it's
// valid. This should be preferred to a simple type conversion.
//
// TODO(a.garipov): Remove if it remains unused.
func NewHumanID(s string) (id HumanID, err error) {
// Do not use [errors.Annotate] here, because it allocates even when the
// error is nil.
defer func() {
if err != nil {
err = fmt.Errorf("bad human id %q: %w", s, err)
}
}()
return newHumanID(s)
}
// newHumanID converts a simple string into a HumanID and makes sure that it's
// valid. It does not wrap the error to be used in places where that could
// create additional allocations.
func newHumanID(s string) (id HumanID, err error) {
err = ValidateInclusion(len(s), MaxHumanIDLen, MinHumanIDLen, UnitByte)
if err != nil {
// Don't wrap the error, because the caller should do that.
return "", err
}
// TODO(a.garipov): Add boolean versions to netutil to avoid allocations of
// errors that aren't used.
err = netutil.ValidateHostnameLabel(s)
if err != nil {
// Don't wrap the error, because the caller should do that.
return "", err
}
if i := strings.Index(s, "---"); i >= 0 {
return "", fmt.Errorf("at index %d: max 2 consecutive hyphens are allowed", i)
}
return HumanID(s), nil
}
// HumanIDLower is the type for [HumanID] values that must be lowercase.
type HumanIDLower string
// NewHumanIDLower converts a simple string into a HumanIDLower and makes sure
// that it's valid and lowercased. This should be preferred to a simple type
// conversion.
func NewHumanIDLower(s string) (id HumanIDLower, err error) {
// Do not use [errors.Annotate] here, because it allocates even when the
// error is nil.
humanID, err := newHumanID(s)
if err != nil {
return "", fmt.Errorf("bad lowercase human id %q: %w", s, err)
}
for i, r := range humanID {
if r >= 'A' && r <= 'Z' {
return "", fmt.Errorf(
"bad lowercase human id %q: at index %d: %q is not lowercase",
s,
i,
r,
)
}
}
return HumanIDLower(s), nil
}
// HumanIDToLower returns a lowercase version of id.
func HumanIDToLower(id HumanID) (lower HumanIDLower) {
return HumanIDLower(strings.ToLower(string(id)))
}
// HumanIDParser normalizes and parses a HumanID from a string.
type HumanIDParser struct {
pool *syncutil.Pool[bytes.Buffer]
}
// NewHumanIDParser creates a new HumanIDParser.
func NewHumanIDParser() (p *HumanIDParser) {
return &HumanIDParser{
pool: syncutil.NewPool(func() (buf *bytes.Buffer) {
return bytes.NewBuffer(make([]byte, 0, netutil.MaxDomainNameLen))
}),
}
}
// ParseNormalized normalizes and parses a HumanID from a string that may have
// issues, such as extra symbols that aren't supported. The normalization is
// best-effort and may still fail, in which case id is empty and err is not nil.
func (p *HumanIDParser) ParseNormalized(s string) (id HumanID, err error) {
id, err = newHumanID(s)
if err == nil {
return id, nil
}
// Do not use [errors.Annotate] here, because it allocates even when the
// error is nil.
original := s
defer func() {
if err != nil {
err = fmt.Errorf("bad non-normalized human id %q: %w", original, err)
}
}()
// Immediately validate it against the upper DNS hostname-length limit.
err = ValidateInclusion(len(s), netutil.MaxDomainNameLen, MinHumanIDLen, UnitByte)
if err != nil {
// Don't wrap the error, because there is already a deferred wrap, and
// the error is informative enough as is.
return "", err
}
buf := p.pool.Get()
defer func() { p.pool.Put(buf) }()
buf.Reset()
n := humanIDNormalizer{
buf: buf,
}
for s != "" {
r, sz := utf8.DecodeRuneInString(s)
s = s[sz:]
n.next(r)
}
s = n.result()
if s == "" || s == "-" {
return "", errors.Error("cannot normalize")
}
id, err = newHumanID(s)
if err != nil {
return "", err
}
return id, nil
}
// humanIDNormalizer is a stateful normalizer of human-readable device
// identifiers.
type humanIDNormalizer struct {
buf *bytes.Buffer
state uint8
prevRune rune
prevPrevRune rune
}
// [humanIDNormalizer] states.
const (
humanIDNormStateInitial uint8 = iota
humanIDNormStateInvalid
humanIDNormStateValid
)
// next writes r to the buffer, if it is valid.
func (p *humanIDNormalizer) next(r rune) {
switch p.state {
case humanIDNormStateInitial:
p.nextInitial(r)
case humanIDNormStateValid:
p.nextValid(r)
case humanIDNormStateInvalid:
p.nextInvalid(r)
default:
panic(fmt.Errorf("bad humanIDNormalizer state %d", p.state))
}
}
// nextInitial processes the initial state of the normalizer.
func (p *humanIDNormalizer) nextInitial(r rune) {
if !netutil.IsValidHostOuterRune(r) {
return
}
p.state = humanIDNormStateValid
p.write(r)
}
// nextValid processes the valid state of the normalizer.
func (p *humanIDNormalizer) nextValid(r rune) {
if r == '-' {
if p.prevPrevRune == '-' && p.prevRune == '-' {
p.buf.Truncate(p.buf.Len() - 2)
p.prevPrevRune = utf8.RuneError
p.prevRune = utf8.RuneError
p.state = humanIDNormStateInvalid
return
}
p.write(r)
return
}
if !netutil.IsValidHostOuterRune(r) {
p.truncateHyphens()
p.state = humanIDNormStateInvalid
return
}
p.write(r)
}
// truncateHyphens removes the unnecessary hyphens from the buffer if necessary.
func (p *humanIDNormalizer) truncateHyphens() {
if p.prevRune != '-' {
return
}
if p.prevPrevRune == '-' {
p.buf.Truncate(p.buf.Len() - 2)
p.prevPrevRune = utf8.RuneError
} else {
p.buf.Truncate(p.buf.Len() - 1)
}
p.prevRune = utf8.RuneError
}
// nextInvalid processes the invalid state of the normalizer.
func (p *humanIDNormalizer) nextInvalid(r rune) {
if !netutil.IsValidHostOuterRune(r) {
return
}
p.state = humanIDNormStateValid
if p.prevRune != '-' {
p.write('-')
}
p.write(r)
}
// write writes r to the buffer while also updating the previous runes.
func (p *humanIDNormalizer) write(r rune) {
_, _ = p.buf.WriteRune(r)
p.prevPrevRune = p.prevRune
p.prevRune = r
}
// result returns the result of the normalization.
func (p *humanIDNormalizer) result() (s string) {
b := p.buf.Bytes()
b = b[:min(len(b), MaxHumanIDLen)]
b = bytes.TrimRight(b, "-")
return string(b)
}