0
Fork 0
mirror of https://github.com/project-zot/zot.git synced 2025-01-06 22:40:28 -05:00
zot/.github/workflows/nightly.yaml
Vishwas R 5ae7a028d9
feat(cluster): Add support for request proxying for scale out (#2385)
* feat(cluster): initial commit for scale-out cluster

Signed-off-by: Ramkumar Chinchani <rchincha@cisco.com>

* feat(cluster): support shared storage scale out

This change introduces support for shared storage backed
zot cluster scale out.

New feature
Multiple stateless zot instances can run using the same shared
storage backend where each instance looks at a specific set
of repositories based on a siphash of the repository name to improve
scale as the load is distributed across multiple instances.
For a given config, there will only be one instance that can perform
dist-spec read/write on a given repository.

What's changed?
- introduced a transparent request proxy for dist-spec endpoints based on
siphash of repository name.
- new config for scale out cluster that specifies list of
cluster members.

Signed-off-by: Vishwas Rajashekar <vrajashe@cisco.com>

---------

Signed-off-by: Ramkumar Chinchani <rchincha@cisco.com>
Signed-off-by: Vishwas Rajashekar <vrajashe@cisco.com>
Co-authored-by: Ramkumar Chinchani <rchincha@cisco.com>
2024-05-20 09:05:21 -07:00

278 lines
9.5 KiB
YAML

name: 'Nightly jobs'
on:
schedule:
- cron: '30 1 * * *'
workflow_dispatch:
permissions: read-all
# The following tests are run:
# 1. run zot with local storage and dedupe disabled, push images, restart zot with dedupe enabled
# task scheduler will start a dedupe all blobs process at zot startup and it shouldn't interfere with clients.
# 2. run zot with s3 storage and dynamodb and dedupe enabled, push images, restart zot with dedupe false and no cache
# task scheduler will start a restore all blobs process at zot startup, after it finishes all blobs should be restored to their original state (have content)
# 3. run many, many, many instances of zot with shared storage and metadata front-ended by HAProxy. start a long-running zb run with high concurrency and number of requests
# to achieve a long-running sustained load on the system. The system is expected to perform well without errors and return performance data after the test.
jobs:
dedupe:
name: Dedupe/restore blobs
runs-on: ubuntu-latest-4-cores
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: 1.22.x
- name: Install dependencies
run: |
cd $GITHUB_WORKSPACE
go install github.com/swaggo/swag/cmd/swag@v1.16.2
go mod download
sudo apt-get update
sudo apt-get install libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev pkg-config rpm uidmap
# install skopeo
git clone -b v1.12.0 https://github.com/containers/skopeo.git
cd skopeo
make bin/skopeo
sudo cp bin/skopeo /usr/bin
skopeo -v
- name: Log in to GitHub Docker Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ github.token }}
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install localstack
run: |
pip install --upgrade pyopenssl
pip install localstack==3.3.0 awscli-local[ver1] # install LocalStack cli and awslocal
docker pull ghcr.io/project-zot/ci-images/localstack:3.3.0 # Make sure to pull the latest version of the image
localstack start -d # Start LocalStack in the background
echo "Waiting for LocalStack startup..." # Wait 30 seconds for the LocalStack container
localstack wait -t 30 # to become ready before timing out
echo "Startup complete"
- name: Run blackbox nightly dedupe tests
run: |
# test restoring s3 blobs after cache is deleted
# test deduping filesystem blobs after switching dedupe to enable
make run-blackbox-dedupe-nightly
env:
AWS_ACCESS_KEY_ID: fake
AWS_SECRET_ACCESS_KEY: fake
- uses: ./.github/actions/teardown-localstack
sync:
name: Sync harness
runs-on: ubuntu-latest-4-cores
steps:
- name: Check out source code
uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: 1.22.x
- name: Install dependencies
run: |
cd $GITHUB_WORKSPACE
go install github.com/swaggo/swag/cmd/swag@v1.16.2
go mod download
- name: Run sync harness
run: |
make run-blackbox-sync-nightly
gc-referrers-stress-s3:
name: GC(with referrers) on S3(localstack) with short interval
runs-on: ubuntu-latest-16-cores
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
cache: false
go-version: 1.22.x
- uses: ./.github/actions/setup-localstack
- name: Run zb
timeout-minutes: 240
id: bench
run: |
make binary
make bench
./bin/zot-linux-amd64 serve test/gc-stress/config-gc-referrers-bench-s3-localstack.json &
sleep 10
bin/zb-linux-amd64 -c 10 -n 100 -o ci-cd http://localhost:8080 --skip-cleanup
killall -r zot-*
# clean zot storage
sudo rm -rf /tmp/zot
env:
AWS_ACCESS_KEY_ID: fake
AWS_SECRET_ACCESS_KEY: fake
continue-on-error: true
- name: Check on failures
if: steps.bench.outcome != 'success'
run: |
cat /tmp/gc-referrers-bench-s3.log
exit 1
- uses: ./.github/actions/teardown-localstack
gc-stress-s3:
name: GC(without referrers) on S3(localstack) with short interval
runs-on: ubuntu-latest-16-cores
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
cache: false
go-version: 1.22.x
- uses: ./.github/actions/setup-localstack
- name: Run zb
timeout-minutes: 240
id: bench
run: |
make binary
make bench
./bin/zot-linux-amd64 serve test/gc-stress/config-gc-bench-s3-localstack.json &
sleep 10
bin/zb-linux-amd64 -c 10 -n 100 -o ci-cd http://localhost:8080 --skip-cleanup
killall -r zot-*
# clean zot storage
sudo rm -rf /tmp/zot
env:
AWS_ACCESS_KEY_ID: fake
AWS_SECRET_ACCESS_KEY: fake
continue-on-error: true
- name: Check on failures
if: steps.bench.outcome != 'success'
run: |
cat /tmp/gc-bench-s3.log
exit 1
- uses: ./.github/actions/teardown-localstack
docker-image:
name: Build docker image (for users still using Docker environments)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Check out source code
uses: actions/checkout@v4
- uses: ./.github/actions/clean-runner
- name: Build image
run: |
make docker-image
kind-setup:
name: Prometheus setup
runs-on: ubuntu-latest-8-cores
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: 1.22.x
- name: Install dependencies
run: |
cd $GITHUB_WORKSPACE
go mod download
sudo apt-get update
sudo apt-get install libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev pkg-config rpm uidmap
# install skopeo
git clone -b v1.12.0 https://github.com/containers/skopeo.git
cd skopeo
make bin/skopeo
sudo cp bin/skopeo /usr/bin
skopeo -v
- name: Log in to GitHub Docker Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ github.token }}
- name: Run tests
run: |
./examples/kind/kind-ci.sh
cloud-scale-out:
name: s3+dynamodb scale-out
runs-on: ubuntu-latest-16-cores
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
cache: false
go-version: 1.22.x
- name: Install dependencies
run: |
cd $GITHUB_WORKSPACE
go install github.com/swaggo/swag/cmd/swag@v1.16.2
go mod download
sudo apt-get update
sudo apt-get install libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev pkg-config rpm uidmap haproxy jq
# install skopeo
git clone -b v1.12.0 https://github.com/containers/skopeo.git
cd skopeo
make bin/skopeo
sudo cp bin/skopeo /usr/bin
skopeo -v
cd $GITHUB_WORKSPACE
- name: Log in to GitHub Docker Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ github.token }}
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install localstack
run: |
pip install --upgrade pyopenssl
pip install localstack==3.3.0 awscli-local[ver1] # install LocalStack cli and awslocal
docker pull ghcr.io/project-zot/ci-images/localstack:3.3.0 # Make sure to pull a working version of the image
localstack start -d # Start LocalStack in the background
echo "Waiting for LocalStack startup..." # Wait 30 seconds for the LocalStack container
localstack wait -t 30 # to become ready before timing out
echo "Startup complete"
- name: Run cloud scale-out high scale performance tests
id: scale
run: |
make run-cloud-scale-out-high-scale-tests
env:
AWS_ACCESS_KEY_ID: fake
AWS_SECRET_ACCESS_KEY: fake
continue-on-error: true
- name: print service logs
run: |
sudo dmesg
cat /tmp/zot-logs/*.log
- name: multi-hop detection
id: multihop
run: |
if cat /tmp/zot-logs/*.log | grep 'cannot proxy an already proxied request'; then
echo "detected multi-hop"
exit 1
else
exit 0
fi
continue-on-error: true
- name: clean up logs
run: |
rm -r /tmp/zot-logs
- name: fail job if error
if: ${{ steps.scale.outcome != 'success' || steps.multihop.outcome != 'success' }}
run: |
exit 1
- name: Upload zb test results zip as build artifact
if: steps.scale.outcome == 'success'
uses: actions/upload-artifact@v4
with:
name: zb-cloud-scale-out-perf-results-${{ github.sha }}
path: ./zb-results/
- uses: ./.github/actions/teardown-localstack