From ae6ee1df1994c7d5b54687791eebefb68929fc19 Mon Sep 17 00:00:00 2001 From: "diana.strebkova@t-systems.com" Date: Mon, 27 Jan 2025 21:36:35 +0100 Subject: [PATCH] Added cleanup method for files in Maven snapshot versions --- custom/conf/app.example.ini | 3 + models/packages/package_file.go | 59 ++++++++++++++ models/packages/package_version.go | 11 ++- modules/packages/maven/metadata.go | 39 ++++++++++ modules/setting/packages.go | 10 ++- services/packages/cleanup/cleanup.go | 7 +- services/packages/maven/cleanup.go | 112 +++++++++++++++++++++++++++ 7 files changed, 233 insertions(+), 8 deletions(-) create mode 100644 services/packages/maven/cleanup.go diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 899209874f..4792e816d7 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2612,6 +2612,9 @@ LEVEL = Info ;LIMIT_SIZE_HELM = -1 ;; Maximum size of a Maven upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) ;LIMIT_SIZE_MAVEN = -1 +;; Specifies the number of most recent Maven snapshot builds to retain. `-1` retains all builds, while `1` retains only the latest build. Value should be -1 or positive. +;; Cleanup expired packages/data then targets the files within all maven snapshots versions +;RETAIN_MAVEN_SNAPSHOT_BUILDS = -1 ;; Maximum size of a npm upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) ;LIMIT_SIZE_NPM = -1 ;; Maximum size of a NuGet upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) diff --git a/models/packages/package_file.go b/models/packages/package_file.go index 270cb32fdf..ecda71385c 100644 --- a/models/packages/package_file.go +++ b/models/packages/package_file.go @@ -5,11 +5,14 @@ package packages import ( "context" + "errors" + "fmt" "strconv" "strings" "time" "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" @@ -226,6 +229,62 @@ func HasFiles(ctx context.Context, opts *PackageFileSearchOptions) (bool, error) return db.Exist[PackageFile](ctx, opts.toConds()) } +// GetFilesByBuildNumber retrieves all files for a package version with build numbers <= maxBuildNumber. +func GetFilesByBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int) ([]*PackageFile, error) { + if maxBuildNumber < 0 { + return nil, errors.New("maxBuildNumber must be a non-negative integer") + } + + files, err := GetFilesByVersionID(ctx, versionID) + if err != nil { + return nil, fmt.Errorf("failed to retrieve files: %w", err) + } + + var filteredFiles []*PackageFile + for _, file := range files { + buildNumber, err := extractBuildNumberFromFileName(file.Name) + if err != nil { + if err.Error() == "metadata file" { + continue + } + log.Warn("Failed to extract build number from file name '%s': %v", file.Name, err) + continue + } + + if buildNumber <= maxBuildNumber { + filteredFiles = append(filteredFiles, file) + } + } + + log.Info("Filtered %d files out of %d total files for version ID %d with maxBuildNumber %d", len(filteredFiles), len(files), versionID, maxBuildNumber) + return filteredFiles, nil +} + +// extractBuildNumberFromFileName extracts the build number from the file name. +func extractBuildNumberFromFileName(filename string) (int, error) { + // Skip metadata files + if strings.Contains(filename, "maven-metadata.xml") { + return 0, errors.New("metadata file") + } + + // Split filename by hyphens to extract the build number + parts := strings.Split(filename, "-") + if len(parts) < 3 { + return 0, fmt.Errorf("invalid file name format: '%s'", filename) + } + + // Extract the last part before the extension + buildNumberWithExt := parts[len(parts)-1] + buildNumberStr := strings.Split(buildNumberWithExt, ".")[0] + + buildNumber, err := strconv.Atoi(buildNumberStr) + if err != nil { + return 0, fmt.Errorf("failed to convert build number to integer: '%s'", buildNumberStr) + } + + return buildNumber, nil +} + // CalculateFileSize sums up all blob sizes matching the search options. // It does NOT respect the deduplication of blobs. func CalculateFileSize(ctx context.Context, opts *PackageFileSearchOptions) (int64, error) { diff --git a/models/packages/package_version.go b/models/packages/package_version.go index 278e8e3a86..b3248e3d10 100644 --- a/models/packages/package_version.go +++ b/models/packages/package_version.go @@ -120,11 +120,16 @@ func getVersionByNameAndVersion(ctx context.Context, ownerID int64, packageType // GetVersionsByPackageType gets all versions of a specific type func GetVersionsByPackageType(ctx context.Context, ownerID int64, packageType Type) ([]*PackageVersion, error) { - pvs, _, err := SearchVersions(ctx, &PackageSearchOptions{ - OwnerID: ownerID, + opts := &PackageSearchOptions{ Type: packageType, IsInternal: optional.Some(false), - }) + } + + if ownerID != 0 { + opts.OwnerID = ownerID + } + + pvs, _, err := SearchVersions(ctx, opts) return pvs, err } diff --git a/modules/packages/maven/metadata.go b/modules/packages/maven/metadata.go index a61a62c086..d903d4b394 100644 --- a/modules/packages/maven/metadata.go +++ b/modules/packages/maven/metadata.go @@ -5,6 +5,7 @@ package maven import ( "encoding/xml" + "errors" "io" "code.gitea.io/gitea/modules/util" @@ -61,6 +62,27 @@ type pomStruct struct { } `xml:"dependencies>dependency"` } +type MavenMetadata struct { + XMLName xml.Name `xml:"metadata"` + GroupID string `xml:"groupId"` + ArtifactID string `xml:"artifactId"` + Version string `xml:"version"` + Versioning struct { + LastUpdated string `xml:"lastUpdated"` + Snapshot struct { + Timestamp string `xml:"timestamp"` + BuildNumber string `xml:"buildNumber"` + } `xml:"snapshot"` + SnapshotVersions []struct { + SnapshotVersion struct { + Extension string `xml:"extension"` + Value string `xml:"value"` + Updated string `xml:"updated"` + } `xml:"snapshotVersion"` + } `xml:"snapshotVersions>snapshotVersion"` + } `xml:"versioning"` +} + // ParsePackageMetaData parses the metadata of a pom file func ParsePackageMetaData(r io.Reader) (*Metadata, error) { var pom pomStruct @@ -109,3 +131,20 @@ func ParsePackageMetaData(r io.Reader) (*Metadata, error) { Dependencies: dependencies, }, nil } + +// ParseMavenMetadata parses the Maven metadata XML to extract the build number. +func ParseMavenMetaData(r io.Reader) (string, error) { + var metadata MavenMetadata + + dec := xml.NewDecoder(r) + dec.CharsetReader = charset.NewReaderLabel // Assuming charset.NewReaderLabel is a function you've set up to handle character encoding. + if err := dec.Decode(&metadata); err != nil { + return "", err + } + + if metadata.Versioning.Snapshot.BuildNumber == "" { + return "", errors.New("no build number in snapshot metadata found") + } + + return metadata.Versioning.Snapshot.BuildNumber, nil +} diff --git a/modules/setting/packages.go b/modules/setting/packages.go index 3f618cfd64..df83a3f07a 100644 --- a/modules/setting/packages.go +++ b/modules/setting/packages.go @@ -44,10 +44,12 @@ var ( LimitSizeSwift int64 LimitSizeVagrant int64 - DefaultRPMSignEnabled bool + DefaultRPMSignEnabled bool + RetainMavenSnapshotBuilds int }{ - Enabled: true, - LimitTotalOwnerCount: -1, + Enabled: true, + LimitTotalOwnerCount: -1, + RetainMavenSnapshotBuilds: -1, } ) @@ -101,7 +103,7 @@ func loadPackagesFrom(rootCfg ConfigProvider) (err error) { Packages.LimitSizeRubyGems = mustBytes(sec, "LIMIT_SIZE_RUBYGEMS") Packages.LimitSizeSwift = mustBytes(sec, "LIMIT_SIZE_SWIFT") Packages.LimitSizeVagrant = mustBytes(sec, "LIMIT_SIZE_VAGRANT") - Packages.DefaultRPMSignEnabled = sec.Key("DEFAULT_RPM_SIGN_ENABLED").MustBool(false) + Packages.RetainMavenSnapshotBuilds = sec.Key("RETAIN_MAVEN_SNAPSHOT_BUILDS").MustInt(Packages.RetainMavenSnapshotBuilds) return nil } diff --git a/services/packages/cleanup/cleanup.go b/services/packages/cleanup/cleanup.go index b7ba2b6ac4..7d70afbd80 100644 --- a/services/packages/cleanup/cleanup.go +++ b/services/packages/cleanup/cleanup.go @@ -1,7 +1,7 @@ // Copyright 2022 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package container +package cleanup import ( "context" @@ -20,6 +20,7 @@ import ( cargo_service "code.gitea.io/gitea/services/packages/cargo" container_service "code.gitea.io/gitea/services/packages/container" debian_service "code.gitea.io/gitea/services/packages/debian" + maven_service "code.gitea.io/gitea/services/packages/maven" rpm_service "code.gitea.io/gitea/services/packages/rpm" ) @@ -166,6 +167,10 @@ func CleanupExpiredData(outerCtx context.Context, olderThan time.Duration) error return err } + if err := maven_service.CleanupSnapshotVersions(ctx); err != nil { + return err + } + ps, err := packages_model.FindUnreferencedPackages(ctx) if err != nil { return err diff --git a/services/packages/maven/cleanup.go b/services/packages/maven/cleanup.go new file mode 100644 index 0000000000..658dd73cab --- /dev/null +++ b/services/packages/maven/cleanup.go @@ -0,0 +1,112 @@ +package maven + +import ( + "context" + "fmt" + "strconv" + "strings" + + "code.gitea.io/gitea/models/packages" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/packages/maven" + "code.gitea.io/gitea/modules/setting" + packages_service "code.gitea.io/gitea/services/packages" +) + +// CleanupSnapshotVersion removes outdated files for SNAPHOT versions for all Maven packages. +func CleanupSnapshotVersions(ctx context.Context) error { + retainBuilds := setting.Packages.RetainMavenSnapshotBuilds + log.Info("Starting CleanupSnapshotVersion with retainBuilds: %d", retainBuilds) + + if retainBuilds == -1 { + log.Info("CleanupSnapshotVersion skipped because retainBuilds is set to -1") + return nil + } + + if retainBuilds < 1 { + return fmt.Errorf("forbidden value for retainBuilds: %d. Minimum 1 build should be retained", retainBuilds) + } + + versions, err := packages.GetVersionsByPackageType(ctx, 0, packages.TypeMaven) + if err != nil { + return fmt.Errorf("failed to retrieve Maven package versions: %w", err) + } + + for _, version := range versions { + log.Info("Processing version: %s (ID: %d)", version.Version, version.ID) + + if !isSnapshotVersion(version.Version) { + log.Info("Skipping non-SNAPSHOT version: %s (ID: %d)", version.Version, version.ID) + continue + } + + if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds); err != nil { + log.Error("Failed to clean up snapshot files for version '%s' (ID: %d): %v", version.Version, version.ID, err) + return err + } + } + + log.Info("Completed CleanupSnapshotVersion") + return nil +} + +func isSnapshotVersion(version string) bool { + return strings.Contains(version, "-SNAPSHOT") +} + +func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int) error { + log.Info("Starting cleanSnapshotFiles for versionID: %d with retainBuilds: %d", versionID, retainBuilds) + + metadataFile, err := packages.GetFileForVersionByName(ctx, versionID, "maven-metadata.xml", packages.EmptyFileKey) + if err != nil { + return fmt.Errorf("failed to retrieve Maven metadata file for version ID %d: %w", versionID, err) + } + + maxBuildNumber, err := extractMaxBuildNumberFromMetadata(ctx, metadataFile) + if err != nil { + return fmt.Errorf("failed to extract max build number from maven-metadata.xml for version ID %d: %w", versionID, err) + } + + log.Info("Max build number for versionID %d: %d", versionID, maxBuildNumber) + + thresholdBuildNumber := maxBuildNumber - retainBuilds + if thresholdBuildNumber <= 0 { + log.Info("No files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID) + return nil + } + + filesToRemove, err := packages.GetFilesByBuildNumber(ctx, versionID, thresholdBuildNumber) + if err != nil { + return fmt.Errorf("failed to retrieve files for version ID %d: %w", versionID, err) + } + + for _, file := range filesToRemove { + log.Debug("Removing file '%s' below threshold %d", file.Name, thresholdBuildNumber) + if err := packages_service.DeletePackageFile(ctx, file); err != nil { + return fmt.Errorf("failed to delete file '%s': %w", file.Name, err) + } + } + + log.Info("Completed cleanSnapshotFiles for versionID: %d", versionID) + return nil +} + +func extractMaxBuildNumberFromMetadata(ctx context.Context, metadataFile *packages.PackageFile) (int, error) { + content, _, _, err := packages_service.GetPackageFileStream(ctx, metadataFile) + if err != nil { + return 0, fmt.Errorf("failed to get package file stream: %w", err) + } + defer content.Close() + + buildNumberStr, err := maven.ParseMavenMetaData(content) + if err != nil { + return 0, fmt.Errorf("failed to parse maven-metadata.xml: %w", err) + } + + buildNumber, err := strconv.Atoi(buildNumberStr) + if err != nil { + return 0, fmt.Errorf("invalid build number format: %w", err) + } + + return buildNumber, nil +}