skywalking
doc https://github.com/SkyAPM/document-cn-translation-of-skywalking
SkyWalking 极简入门 https://skywalking.apache.org/zh/2020-04-19-skywalking-quick-start/
- 多种监控手段。可以通过语言探针和 service mesh 获得监控是数据。
- 多个语言自动探针。包括 Java,.NET Core 和 Node.JS。
- 轻量高效。无需大数据平台,和大量的服务器资源。
- 模块化。UI、存储、集群管理都有多种机制可选。
- 支持告警。
- 优秀的可视化解决方案。
docker
https://skywalking.apache.org/docs/main/v10.0.1/en/setup/backend/backend-docker/
1 | $ docker run --name oap --restart always -d -e SW_STORAGE=elasticsearch -e SW_STORAGE_ES_CLUSTER_NODES=elasticsearch:9200 apache/skywalking-oap-server:9.2.0 |
/skywalking/config/application.yml
storage:
selector: ${SW_STORAGE:elasticsearch}
elasticsearch:
namespace: ${SW_NAMESPACE:””}
user: ${SW_ES_USER:””} # User needs to be set when Http Basic authentication is enabled
password: ${SW_ES_PASSWORD:””} # Password to be set when Http Basic authentication is enabled
clusterNodes: ${SW_STORAGE_ES_CLUSTER_NODES:localhost:443}
trustStorePath: ${SW_STORAGE_ES_SSL_JKS_PATH:”../es_keystore.jks”}
trustStorePass: ${SW_STORAGE_ES_SSL_JKS_PASS:””}
protocol: ${SW_STORAGE_ES_HTTP_PROTOCOL:”https”}
1 | storage: |
1 | $ docker run --name oap-ui --restart always -d -e SW_OAP_ADDRESS=http://oap:12800 apache/skywalking-ui |
skywalking-oap.yaml
oap & ui
version: '3'
services:
oap:
image: apache/skywalking-oap-server:10.0.1
container_name: oap
#restart: always
environment:
- TZ=Asia/Shanghai
- SW_STORAGE=elasticsearch
- SW_NAMESPACE=sw-index
- SW_STORAGE_ES_CLUSTER_NODES="es.local.org:9123"
- SW_ES_USER=elastic
- SW_ES_PASSWORD=cs123456
- SW_STORAGE_ES_SSL_JKS_PATH="/opt/es.jks"
- SW_STORAGE_ES_SSL_JKS_PASS=cs123456
- SW_STORAGE_ES_HTTP_PROTOCOL=https
- SW_CORE_RECORD_DATA_TTL=3
- SW_CORE_METRICS_DATA_TTL=7
## 修改存储类型为 PostgreSQL
# - SW_STORAGE=postgresql
# - SW_NAMESPACE=skywalking # 根据需要设置命名空间
# - SW_JDBC_URL="jdbc:postgresql://local.org:5433/skywalking" # PostgreSQL JDBC URL
# - SW_DATA_SOURCE_USER=postgres # PostgreSQL 用户名
# - SW_DATA_SOURCE_PASSWORD=123456 # PostgreSQL 密码
- SW_TELEMETRY=prometheus
- SW_TELEMETRY_PROMETHEUS_HOST=0.0.0.0
- SW_TELEMETRY_PROMETHEUS_PORT=1234
- SW_TELEMETRY_PROMETHEUS_SSL_ENABLED=false
ports:
- "11800:11800" #收集监控数据的端口
- "12800:12800" #接受前端请求的端口
- "11234:1234"
extra_hosts:
- "es.local.org:192.168.122.1" # 添加主机名
volumes:
- '/opt/ELK/elasticsearch-7.17.1/es.local.jks:/opt/es.jks'
# - '/opt/ELK/elasticsearch-7.17.1/es.jks:/opt/es.jks'
- '/etc/localtime:/etc/localtime:ro'
networks:
- skywalking-net
oap-ui:
image: apache/skywalking-ui:10.0.1
container_name: oap-ui
restart: always
environment:
- TZ=Asia/Shanghai
# - SW_OAP_ADDRESS=http://192.168.122.1:12800
- SW_OAP_ADDRESS=http://oap:12800
ports:
- "8086:8080" # 根据需要更改端口映射
depends_on:
- oap
volumes:
- '/etc/localtime:/etc/localtime:ro'
networks:
- skywalking-net
networks:
skywalking-net:
driver: bridge
other
示例
agent-as-sidecar
agent-as-sidecar
apiVersion: v1
kind: Pod
metadata:
name: agent-as-sidecar
spec:
restartPolicy: Never
volumes:
- name: skywalking-agent
emptyDir: { }
containers:
- name: agent-container
image: apache/skywalking-java-agent:8.4.0-alpine
volumeMounts:
- name: skywalking-agent
mountPath: /agent
command: [ "/bin/sh" ]
args: [ "-c", "cp -R /skywalking/agent /agent/" ]
- name: app-container
image: springio/gs-spring-boot-docker
volumeMounts:
- name: skywalking-agent
mountPath: /skywalking
env:
- name: JAVA_TOOL_OPTIONS
value: "-javaagent:/skywalking/agent/skywalking-agent.jar"
ui
初始化时没有对应服务,指标导到sw时显示
示例
sw-ui-es.yaml
skywalking-ui-elasticsearch
version: '3'
services:
oap:
image: apache/skywalking-oap-server:10.0.1
container_name: oap
#restart: always
environment:
- TZ=Asia/Shanghai
- SW_STORAGE=elasticsearch
- SW_NAMESPACE=sw-index
- SW_STORAGE_ES_CLUSTER_NODES="es.local.org:9123"
- SW_ES_USER=elastic
- SW_ES_PASSWORD=cs123456
- SW_STORAGE_ES_SSL_JKS_PATH="/opt/es.jks"
- SW_STORAGE_ES_SSL_JKS_PASS=cs123456
- SW_STORAGE_ES_HTTP_PROTOCOL=https
- SW_CORE_RECORD_DATA_TTL=3
- SW_CORE_METRICS_DATA_TTL=7
## 修改存储类型为 PostgreSQL
# - SW_STORAGE=postgresql
# - SW_NAMESPACE=skywalking # 根据需要设置命名空间
# - SW_JDBC_URL="jdbc:postgresql://local.org:5433/skywalking" # PostgreSQL JDBC URL
# - SW_DATA_SOURCE_USER=postgres # PostgreSQL 用户名
# - SW_DATA_SOURCE_PASSWORD=123456 # PostgreSQL 密码
- SW_TELEMETRY=prometheus
- SW_TELEMETRY_PROMETHEUS_HOST=0.0.0.0
- SW_TELEMETRY_PROMETHEUS_PORT=1234
- SW_TELEMETRY_PROMETHEUS_SSL_ENABLED=false
ports:
- "11800:11800" #收集监控数据的端口
- "12800:12800" #接受前端请求的端口
- "11234:1234"
extra_hosts:
- "es.local.org:192.168.122.1" # 添加主机名
healthcheck:
test: [ "CMD", "bash", "-c", "cat < /dev/null > /dev/tcp/127.0.0.1/12800" ]
interval: 10s #健康检查的执行间隔
timeout: 2s #健康检查命令的超时时间
retries: 3 #失败的次数
start_period: 50s #开始执行健康检查之前的等待时间
volumes:
- '/opt/ELK/elasticsearch-7.17.1/es.local.jks:/opt/es.jks'
# - '/opt/ELK/elasticsearch-7.17.1/es.jks:/opt/es.jks'
- '/etc/localtime:/etc/localtime:ro'
networks:
- skywalking-net
oap-ui:
image: apache/skywalking-ui:10.0.1
container_name: oap-ui
restart: always
environment:
- TZ=Asia/Shanghai
# - SW_OAP_ADDRESS=http://192.168.122.1:12800
- SW_OAP_ADDRESS=http://oap:12800
ports:
- "8086:8080" # 根据需要更改端口映射
depends_on:
oap:
condition: service_healthy
volumes:
- '/etc/localtime:/etc/localtime:ro'
networks:
- skywalking-net
otel-collector:
image: k8s.org/monitor/opentelemetry-collector-contrib:0.107.0
container_name: otel
volumes:
- '/opt/monitor/opentelemetry/otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml:ro'
ports:
- 1888:1888 # pprof extension
- 8888:8888 # Prometheus metrics exposed by the Collector
- 8889:8889 # Prometheus exporter metrics
- 13133:13133 # health_check extension http://localhost:13133/health
- 4317:4317 # OTLP gRPC receiver
- 4318:4318 # OTLP http receiver
- 56789:55679 # zpages extension 55679/debug/servicez
extra_hosts:
- "local.org:192.168.122.1" # 添加主机名
networks:
skywalking-net:
driver: bridge
exporters sw
otel-sw.yaml
otel-sw.yaml
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##接收器
receivers:
opencensus:
endpoint: 0.0.0.0:55678
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
prometheus:
config:
scrape_configs:
- job_name: 'skywalking-so11y'
scrape_interval: 8s
static_configs:
- targets: ['oap:1234']
labels:
host_name: http://oap:1234
service: oap-server
- job_name: otel-collector
static_configs:
- targets:
- otel:8888
labels:
service: otel-collector
- job_name: 'nginx-monitoring'
scrape_interval: 10s
metrics_path: "/metrics"
static_configs:
- targets: ['local.org:9145']
labels:
service: nginx-service
service_instance_id: e2e-test-instance
# - job_name: 'apisix-monitoring'
# scrape_interval: 10s
# metrics_path: "/apisix/prometheus/metrics"
# static_configs:
# - targets: ['local.org:9091']
# labels:
# skywalking_service: showcase-apisix-service
- job_name: "vm-monitoring" # make sure to use this in the vm.yaml to filter only VM metrics
scrape_interval: 10s
static_configs:
- targets: ["local.org:9100"]
- job_name: 'mysql-monitoring'
scrape_interval: 5s
static_configs:
- targets: ['local.org:9104']
labels:
host_name: local.org:3305
- job_name: "postgresql-monitoring"
scrape_interval: 5s
static_configs: #["postgres-exporter:9187","postgres-exporter_2:9187","postgres-exporter_3:9187"]
- targets: ["local.org:9187"]
labels:
host_name: local.org:5433
- job_name: 'redis-monitoring'
scrape_interval: 5s
static_configs:
- targets: ['local.org:9121']
labels:
host_name: local.org:9121
# - job_name: 'docker-monitoring'
# scrape_interval: 10s
# metrics_path: "/metrics"
# static_configs:
# - targets: ['local.org:9188']
# labels:
# service: docker-service
# host_name: local.org
processors:
batch:
send_batch_size: 10000
send_batch_max_size: 10000
timeout: 10s
##导出器
exporters:
####################################################################################
# If you want to use otlp exporter please ensure that your OAP version is >= 9.2.0 #
####################################################################################
otlp:
endpoint: oap:11800
tls:
insecure: true
timeout: 10s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 20s
max_elapsed_time: 2m
debug:
##扩展组件
extensions:
health_check:
endpoint: 0.0.0.0:13133
pprof:
endpoint: 0.0.0.0:1888
zpages:
endpoint: 0.0.0.0:55679
service:
telemetry:
metrics:
address: 0.0.0.0:8888
extensions: [health_check, pprof, zpages]
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [otlp,debug]
metrics:
# receivers: [otlp,prometheus]
receivers: [otlp]
processors: [batch]
exporters: [otlp]
logs:
receivers: [otlp]
processors: [batch]
exporters: [otlp,debug]
指标采集器
exporter.yaml
exporter
---
version: '3.8'
services:
node_exporter:
image: k8s.org/monitor/node-exporter:v1.8.2
container_name: node_exporter
command:
- '--path.rootfs=/host'
# - '--collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)'
ports:
- "9100:9100/tcp"
restart: unless-stopped
volumes:
- '/:/host:ro,rslave'
postgres-exporter:
image: k8s.org/monitor/postgres-exporter:v0.15.0
container_name: postgres_exporter
extra_hosts:
- "local.org:192.168.122.1" # 添加主机名
environment:
- DATA_SOURCE_NAME=postgres://postgres:123456@local.org:5433/postgres?sslmode=disable
ports:
- "9187:9187/tcp"
mysqld-exporter:
image: k8s.org/monitor/mysqld-exporter:v0.15.1
container_name: mysqld_exporter
command:
- '--mysqld.address=local.org:3305'
- '--config.my-cnf=/opt/my.cnf'
volumes:
- './config/mysql.cnf:/opt/my.cnf'
ports:
- "9104:9104/tcp"
extra_hosts:
- "local.org:192.168.122.1" # 添加主机名
redis-exporter:
image: k8s.org/monitor/redis_exporter:v1.62.0
container_name: redis_exporter
extra_hosts:
- "local.org:192.168.122.1" # 添加主机名
command:
- '--redis.addr=redis://local.org:9736'
- '--redis.user=exporter'
- '--redis.password-file=/opt/redis-pwd.json'
- '--web.listen-address=0.0.0.0:9121'
- '--web.telemetry-path=/metrics'
volumes:
- './config/redis-pwd.json:/opt/redis-pwd.json'
ports:
- "9121:9121/tcp"