mirror of
https://github.com/project-zot/zot.git
synced 2025-01-06 22:40:28 -05:00
5ae7a028d9
* feat(cluster): initial commit for scale-out cluster Signed-off-by: Ramkumar Chinchani <rchincha@cisco.com> * feat(cluster): support shared storage scale out This change introduces support for shared storage backed zot cluster scale out. New feature Multiple stateless zot instances can run using the same shared storage backend where each instance looks at a specific set of repositories based on a siphash of the repository name to improve scale as the load is distributed across multiple instances. For a given config, there will only be one instance that can perform dist-spec read/write on a given repository. What's changed? - introduced a transparent request proxy for dist-spec endpoints based on siphash of repository name. - new config for scale out cluster that specifies list of cluster members. Signed-off-by: Vishwas Rajashekar <vrajashe@cisco.com> --------- Signed-off-by: Ramkumar Chinchani <rchincha@cisco.com> Signed-off-by: Vishwas Rajashekar <vrajashe@cisco.com> Co-authored-by: Ramkumar Chinchani <rchincha@cisco.com>
278 lines
9.5 KiB
YAML
278 lines
9.5 KiB
YAML
name: 'Nightly jobs'
|
|
on:
|
|
schedule:
|
|
- cron: '30 1 * * *'
|
|
workflow_dispatch:
|
|
|
|
permissions: read-all
|
|
|
|
# The following tests are run:
|
|
# 1. run zot with local storage and dedupe disabled, push images, restart zot with dedupe enabled
|
|
# task scheduler will start a dedupe all blobs process at zot startup and it shouldn't interfere with clients.
|
|
# 2. run zot with s3 storage and dynamodb and dedupe enabled, push images, restart zot with dedupe false and no cache
|
|
# task scheduler will start a restore all blobs process at zot startup, after it finishes all blobs should be restored to their original state (have content)
|
|
# 3. run many, many, many instances of zot with shared storage and metadata front-ended by HAProxy. start a long-running zb run with high concurrency and number of requests
|
|
# to achieve a long-running sustained load on the system. The system is expected to perform well without errors and return performance data after the test.
|
|
jobs:
|
|
dedupe:
|
|
name: Dedupe/restore blobs
|
|
runs-on: ubuntu-latest-4-cores
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/setup-go@v5
|
|
with:
|
|
go-version: 1.22.x
|
|
- name: Install dependencies
|
|
run: |
|
|
cd $GITHUB_WORKSPACE
|
|
go install github.com/swaggo/swag/cmd/swag@v1.16.2
|
|
go mod download
|
|
sudo apt-get update
|
|
sudo apt-get install libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev pkg-config rpm uidmap
|
|
# install skopeo
|
|
git clone -b v1.12.0 https://github.com/containers/skopeo.git
|
|
cd skopeo
|
|
make bin/skopeo
|
|
sudo cp bin/skopeo /usr/bin
|
|
skopeo -v
|
|
- name: Log in to GitHub Docker Registry
|
|
uses: docker/login-action@v3
|
|
with:
|
|
registry: ghcr.io
|
|
username: ${{ github.actor }}
|
|
password: ${{ github.token }}
|
|
- uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.11'
|
|
- name: Install localstack
|
|
run: |
|
|
pip install --upgrade pyopenssl
|
|
pip install localstack==3.3.0 awscli-local[ver1] # install LocalStack cli and awslocal
|
|
docker pull ghcr.io/project-zot/ci-images/localstack:3.3.0 # Make sure to pull the latest version of the image
|
|
localstack start -d # Start LocalStack in the background
|
|
|
|
echo "Waiting for LocalStack startup..." # Wait 30 seconds for the LocalStack container
|
|
localstack wait -t 30 # to become ready before timing out
|
|
echo "Startup complete"
|
|
- name: Run blackbox nightly dedupe tests
|
|
run: |
|
|
# test restoring s3 blobs after cache is deleted
|
|
# test deduping filesystem blobs after switching dedupe to enable
|
|
make run-blackbox-dedupe-nightly
|
|
env:
|
|
AWS_ACCESS_KEY_ID: fake
|
|
AWS_SECRET_ACCESS_KEY: fake
|
|
- uses: ./.github/actions/teardown-localstack
|
|
|
|
sync:
|
|
name: Sync harness
|
|
runs-on: ubuntu-latest-4-cores
|
|
steps:
|
|
- name: Check out source code
|
|
uses: actions/checkout@v4
|
|
- uses: actions/setup-go@v5
|
|
with:
|
|
go-version: 1.22.x
|
|
- name: Install dependencies
|
|
run: |
|
|
cd $GITHUB_WORKSPACE
|
|
go install github.com/swaggo/swag/cmd/swag@v1.16.2
|
|
go mod download
|
|
- name: Run sync harness
|
|
run: |
|
|
make run-blackbox-sync-nightly
|
|
|
|
gc-referrers-stress-s3:
|
|
name: GC(with referrers) on S3(localstack) with short interval
|
|
runs-on: ubuntu-latest-16-cores
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/setup-go@v5
|
|
with:
|
|
cache: false
|
|
go-version: 1.22.x
|
|
- uses: ./.github/actions/setup-localstack
|
|
|
|
- name: Run zb
|
|
timeout-minutes: 240
|
|
id: bench
|
|
run: |
|
|
make binary
|
|
make bench
|
|
./bin/zot-linux-amd64 serve test/gc-stress/config-gc-referrers-bench-s3-localstack.json &
|
|
sleep 10
|
|
bin/zb-linux-amd64 -c 10 -n 100 -o ci-cd http://localhost:8080 --skip-cleanup
|
|
|
|
killall -r zot-*
|
|
|
|
# clean zot storage
|
|
sudo rm -rf /tmp/zot
|
|
env:
|
|
AWS_ACCESS_KEY_ID: fake
|
|
AWS_SECRET_ACCESS_KEY: fake
|
|
continue-on-error: true
|
|
|
|
- name: Check on failures
|
|
if: steps.bench.outcome != 'success'
|
|
run: |
|
|
cat /tmp/gc-referrers-bench-s3.log
|
|
exit 1
|
|
- uses: ./.github/actions/teardown-localstack
|
|
|
|
gc-stress-s3:
|
|
name: GC(without referrers) on S3(localstack) with short interval
|
|
runs-on: ubuntu-latest-16-cores
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/setup-go@v5
|
|
with:
|
|
cache: false
|
|
go-version: 1.22.x
|
|
- uses: ./.github/actions/setup-localstack
|
|
|
|
- name: Run zb
|
|
timeout-minutes: 240
|
|
id: bench
|
|
run: |
|
|
make binary
|
|
make bench
|
|
./bin/zot-linux-amd64 serve test/gc-stress/config-gc-bench-s3-localstack.json &
|
|
sleep 10
|
|
bin/zb-linux-amd64 -c 10 -n 100 -o ci-cd http://localhost:8080 --skip-cleanup
|
|
|
|
killall -r zot-*
|
|
|
|
# clean zot storage
|
|
sudo rm -rf /tmp/zot
|
|
env:
|
|
AWS_ACCESS_KEY_ID: fake
|
|
AWS_SECRET_ACCESS_KEY: fake
|
|
continue-on-error: true
|
|
|
|
- name: Check on failures
|
|
if: steps.bench.outcome != 'success'
|
|
run: |
|
|
cat /tmp/gc-bench-s3.log
|
|
exit 1
|
|
- uses: ./.github/actions/teardown-localstack
|
|
|
|
docker-image:
|
|
name: Build docker image (for users still using Docker environments)
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Check out source code
|
|
uses: actions/checkout@v4
|
|
- uses: ./.github/actions/clean-runner
|
|
- name: Build image
|
|
run: |
|
|
make docker-image
|
|
|
|
kind-setup:
|
|
name: Prometheus setup
|
|
runs-on: ubuntu-latest-8-cores
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/setup-go@v5
|
|
with:
|
|
go-version: 1.22.x
|
|
- name: Install dependencies
|
|
run: |
|
|
cd $GITHUB_WORKSPACE
|
|
go mod download
|
|
sudo apt-get update
|
|
sudo apt-get install libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev pkg-config rpm uidmap
|
|
# install skopeo
|
|
git clone -b v1.12.0 https://github.com/containers/skopeo.git
|
|
cd skopeo
|
|
make bin/skopeo
|
|
sudo cp bin/skopeo /usr/bin
|
|
skopeo -v
|
|
- name: Log in to GitHub Docker Registry
|
|
uses: docker/login-action@v3
|
|
with:
|
|
registry: ghcr.io
|
|
username: ${{ github.actor }}
|
|
password: ${{ github.token }}
|
|
- name: Run tests
|
|
run: |
|
|
./examples/kind/kind-ci.sh
|
|
|
|
cloud-scale-out:
|
|
name: s3+dynamodb scale-out
|
|
runs-on: ubuntu-latest-16-cores
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/setup-go@v5
|
|
with:
|
|
cache: false
|
|
go-version: 1.22.x
|
|
- name: Install dependencies
|
|
run: |
|
|
cd $GITHUB_WORKSPACE
|
|
go install github.com/swaggo/swag/cmd/swag@v1.16.2
|
|
go mod download
|
|
sudo apt-get update
|
|
sudo apt-get install libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev pkg-config rpm uidmap haproxy jq
|
|
# install skopeo
|
|
git clone -b v1.12.0 https://github.com/containers/skopeo.git
|
|
cd skopeo
|
|
make bin/skopeo
|
|
sudo cp bin/skopeo /usr/bin
|
|
skopeo -v
|
|
cd $GITHUB_WORKSPACE
|
|
- name: Log in to GitHub Docker Registry
|
|
uses: docker/login-action@v3
|
|
with:
|
|
registry: ghcr.io
|
|
username: ${{ github.actor }}
|
|
password: ${{ github.token }}
|
|
- uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.11'
|
|
- name: Install localstack
|
|
run: |
|
|
pip install --upgrade pyopenssl
|
|
pip install localstack==3.3.0 awscli-local[ver1] # install LocalStack cli and awslocal
|
|
docker pull ghcr.io/project-zot/ci-images/localstack:3.3.0 # Make sure to pull a working version of the image
|
|
localstack start -d # Start LocalStack in the background
|
|
|
|
echo "Waiting for LocalStack startup..." # Wait 30 seconds for the LocalStack container
|
|
localstack wait -t 30 # to become ready before timing out
|
|
echo "Startup complete"
|
|
- name: Run cloud scale-out high scale performance tests
|
|
id: scale
|
|
run: |
|
|
make run-cloud-scale-out-high-scale-tests
|
|
env:
|
|
AWS_ACCESS_KEY_ID: fake
|
|
AWS_SECRET_ACCESS_KEY: fake
|
|
continue-on-error: true
|
|
- name: print service logs
|
|
run: |
|
|
sudo dmesg
|
|
cat /tmp/zot-logs/*.log
|
|
- name: multi-hop detection
|
|
id: multihop
|
|
run: |
|
|
if cat /tmp/zot-logs/*.log | grep 'cannot proxy an already proxied request'; then
|
|
echo "detected multi-hop"
|
|
exit 1
|
|
else
|
|
exit 0
|
|
fi
|
|
continue-on-error: true
|
|
- name: clean up logs
|
|
run: |
|
|
rm -r /tmp/zot-logs
|
|
- name: fail job if error
|
|
if: ${{ steps.scale.outcome != 'success' || steps.multihop.outcome != 'success' }}
|
|
run: |
|
|
exit 1
|
|
- name: Upload zb test results zip as build artifact
|
|
if: steps.scale.outcome == 'success'
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: zb-cloud-scale-out-perf-results-${{ github.sha }}
|
|
path: ./zb-results/
|
|
- uses: ./.github/actions/teardown-localstack
|