ELK+Filebeat+Kafka+ZooKeeper 构建海量日志分析平台
什么要做日志分析平台?随着业务量的增长,每天业务服务器将会产生上亿条的日志,单个日志文件达几个GB,这时我们发现用Linux自带工具,cat grep awk 分析越来越力不从心了,而且除了服务器日志,还有程序报错日志,分布在不同的服务器,查阅繁琐。待解决的痛点:1、大量不同种类的日志成为了运维人员的负担,不方便管理;2、
·
IP | 角色 | 所属集群 |
10.10.1.2 | 业务服务器+filebeat | 业务服务器集群 |
10.10.1.30 | Logstash+Kafka+ZooKeeper | |
10.10.1.31 | Logstash+Kafka+ZooKeeper | |
10.10.1.32 | Kafka+ZooKeeper | |
10.10.1.50 | Logstash | 数据转发 |
10.10.1.60 | ES DataNode | |
10.10.1.90 | ES DataNode | |
10.10.1.244 | ES Master+Kibana |
1
2
3
4
|
# 安装命令
yum install jdk-8u101-linux-x64.rpm elasticsearch-2.3.4.rpm -y
# ES 会被默认安装在 /usr/share/elasticsearch/
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
# 配置系统最大打开文件描述符数
vim /etc/sysctl .conf
fs. file -max=65535
# 配置进程最大打开文件描述符
vim /etc/security/limits .conf
# End of file
* soft nofile 65535
* hard nofile 65535
# 配置 JVM内存
vim /etc/sysconfig/elasticsearch
ES_HEAP_SIZE=4g
# 这台机器的可用内存为8G
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
# /etc/elasticsearch/elasticsearch.yml
# ---------------------------------- Cluster -----------------------------------
# Use a descriptive name for your cluster:
cluster.name: bigdata
# ------------------------------------ Node ------------------------------------
node.name: server1
node.master: true
node.data: false
# ----------------------------------- Index ------------------------------------
index.number_of_shards: 5
index.number_of_replicas: 0
index.refresh_interval: 120s
# ----------------------------------- Paths ------------------------------------
path.data: /home/elk/data
path.logs: /var/log/elasticsearch/elasticsearch .log
# ----------------------------------- Memory -----------------------------------
bootstrap.mlockall: true
indices.fielddata.cache.size: 50mb
#------------------------------------ Network And HTTP --------------------------
network.host: 0.0.0.0
http.port: 9200
# ------------------------------------ Translog ----------------------------------
index.translog.flush_threshold_ops: 50000
# --------------------------------- Discovery ------------------------------------
discovery.zen.minimum_master_nodes: 1
discovery.zen. ping .timeout: 200s
discovery.zen.fd.ping_timeout: 200s
discovery.zen.fd. ping .interval: 30s
discovery.zen.fd. ping .retries: 6
discovery.zen. ping .unicast.hosts: [ "10.10.1.60:9300" , "10.10.1.90:9300" , "10.10.1.244:9300" ,]
discovery.zen. ping .multicast.enabled: false
# --------------------------------- merge ------------------------------------------
indices.store.throttle.max_bytes_per_sec: 100mb
|
1
2
3
4
5
6
|
# head
/usr/share/elasticsearch/bin/plugin install mobz /elasticsearch-head
# kopf
/usr/share/elasticsearch/bin/plugin install lmenezes /elasticsearch-kopf
# bigdesk
/usr/share/elasticsearch/bin/plugin install hlstudio /bigdesk
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
# ---------------------------------- Cluster -----------------------------------
# Use a descriptive name for your cluster:
cluster.name: bigdata
# ------------------------------------ Node ------------------------------------
node.name: server2
node.master: false
node.data: true
# ----------------------------------- Index ------------------------------------
index.number_of_shards: 5
index.number_of_replicas: 0
index.refresh_interval: 120s
# ----------------------------------- Paths ------------------------------------
path.data: /home/elk/data , /disk2/elk/data2
path.logs: /var/log/elasticsearch/elasticsearch .log
# ----------------------------------- Memory -----------------------------------
bootstrap.mlockall: true
indices.fielddata.cache.size: 50mb
#------------------------------------ Network And HTTP --------------------------
network.host: 0.0.0.0
http.port: 9200
# ------------------------------------ Translog ----------------------------------
index.translog.flush_threshold_ops: 50000
# --------------------------------- Discovery ------------------------------------
discovery.zen.minimum_master_nodes: 1
discovery.zen. ping .timeout: 200s
discovery.zen.fd.ping_timeout: 200s
discovery.zen.fd. ping .interval: 30s
discovery.zen.fd. ping .retries: 6
discovery.zen. ping .unicast.hosts: [ "10.10.1.244:9300" ,]
discovery.zen. ping .multicast.enabled: false
# --------------------------------- merge ------------------------------------------
indices.store.throttle.max_bytes_per_sec: 100mb
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
# ---------------------------------- Cluster -----------------------------------
# Use a descriptive name for your cluster:
cluster.name: bigdata
# ------------------------------------ Node ------------------------------------
node.name: server3
node.master: false
node.data: true
# ----------------------------------- Index ------------------------------------
index.number_of_shards: 5
index.number_of_replicas: 0
index.refresh_interval: 120s
# ----------------------------------- Paths ------------------------------------
path.data: /home/elk/single
path.logs: /var/log/elasticsearch/elasticsearch .log
# ----------------------------------- Memory -----------------------------------
bootstrap.mlockall: true
indices.fielddata.cache.size: 50mb
#------------------------------------ Network And HTTP --------------------------
network.host: 0.0.0.0
http.port: 9200
# ------------------------------------ Translog ----------------------------------
index.translog.flush_threshold_ops: 50000
# --------------------------------- Discovery ------------------------------------
discovery.zen.minimum_master_nodes: 1
discovery.zen. ping .timeout: 200s
discovery.zen.fd.ping_timeout: 200s
discovery.zen.fd. ping .interval: 30s
discovery.zen.fd. ping .retries: 6
discovery.zen. ping .unicast.hosts: [ "10.10.1.244:9300" ,]
discovery.zen. ping .multicast.enabled: false
# --------------------------------- merge ------------------------------------------
indices.store.throttle.max_bytes_per_sec: 100mb
|
1
2
3
4
5
6
|
# 10.10.1.244
/etc/init .d /elasticsearch start
# 10.10.1.60
/etc/init .d /elasticsearch start
# 10.10.1.90
/etc/init .d /elasticsearch start
|
1
2
3
4
5
|
# zookeeper 依赖 java,如果之前没安装过JDK,则需要安装.
rpm -ivh jdk-8u101-linux-x64.rpm
# 解压程序
tar xf zookeeper-3.4.9. tar .gz
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
# conf/zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir= /u01/zookeeper/zookeeper-3 .4.9 /data
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
server.11=10.10.1.30:2888:3888
server.12=10.10.1.31:2888:3888
server.13=10.10.1.32:2888:3888
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
# autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
# autopurge.purgeInterval=1
|
1
2
|
scp zoo.cfg 10.10.1.31: /usr/local/zookeeper-3 .4.9 /conf/
scp zoo.cfg 10.10.1.32: /usr/local/zookeeper-3 .4.9 /conf/
|
1
2
3
4
5
6
7
8
|
# 10.10.1.30
echo 11 > /usr/local/zookeeper-3 .4.9 /data/myid
# 10.10.1.31
echo 12 > /usr/local/zookeeper-3 .4.9 /data/myid
# 10.10.1.32
echo 13 > /usr/local/zookeeper-3 .4.9 /data/myid
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
# 10.10.1.30
bin /zkServer .sh start
bin /zkServer .sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper/zookeeper-3 .4.9 /bin/ .. /conf/zoo .cfg
Mode: leader
# 10.10.1.31
bin /zkServer .sh start
bin /zkServer .sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper/zookeeper-3 .4.9 /bin/ .. /conf/zoo .cfg
Mode: follower
# 10.10.1.32
bin /zkServer .sh start
bin /zkServer .sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper/zookeeper-3 .4.9 /bin/ .. /conf/zoo .cfg
Mode: follower
|
1
2
|
# 解压程序
tar xf kafka_2.11-0.10.0.1.tgz
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
############################# Server Basics #############################
broker. id =1
############################# Socket Server Settings #############################
num.network.threads=3
# The number of threads doing disk I/O
num.io.threads=8
# The send buffer (SO_SNDBUF) used by the socket server
socket.send.buffer.bytes=102400
# The receive buffer (SO_RCVBUF) used by the socket server
socket.receive.buffer.bytes=102400
# The maximum size of a request that the socket server will accept (protection against OOM)
socket.request.max.bytes=104857600
############################# Log Basics #############################
log. dirs = /usr/local/kafka/kafka_2 .11-0.10.0.1 /data
num.partitions=6
num.recovery.threads.per.data. dir =1
############################# Log Flush Policy #############################
# The number of messages to accept before forcing a flush of data to disk
#log.flush.interval.messages=10000
# The maximum amount of time a message can sit in a log before we force a flush
#log.flush.interval.ms=1000
############################# Log Retention Policy #############################
log.retention.hours=60
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
############################# Zookeeper #############################
zookeeper.connect=10.10.1.30:2181,10.10.1.31:2181,10.10.1.32:2181
zookeeper.connection.timeout.ms=6000
|
1
2
3
4
5
6
7
8
9
|
scp server.properties 10.10.1.31: /usr/local/kafka/kafka_2 .11-0.10.0.1 /config/
scp server.properties 10.10.1.32: /usr/local/kafka/kafka_2 .11-0.10.0.1 /config/
# 修改 broker.id
# 10.10.1.31
broker. id =2
# 10.10.1.32
broker. id =3
|
1
2
3
4
5
6
7
|
vim /etc/hosts
10.10.1.30 server1
10.10.1.31 server2
10.10.1.32 server3
# 记得同步到其他两台节点
|
1
2
|
bin /kafka-server-start .sh config /server .properties
# 其他两台节点启动方式相同
|
1
2
|
# 解压程序
tar xf logstash-2.3.2. tar .gz
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
input {
beats {
port => 5044
codec => "json"
}
}
filter {
if [ type ] == "nginxacclog" {
geoip {
source => "clientip" # 与日志中访问地址的key要对应
target => "geoip"
database => "/usr/local/logstash/GeoLiteCity.dat"
add_field => [ "[geoip][coordinates]" , "%{[geoip][longitude]}" ]
add_field => [ "[geoip][coordinates]" , "%{[geoip][latitude]}" ]
}
mutate {
convert => [ "[geoip][coordinates]" , "float" ]
}
}
}
output {
kafka {
workers => 2
bootstrap_servers => "10.10.1.30:9092,10.10.1.31:9092,10.10.1.32:9092"
topic_id => "peiyinlog"
}
}
|
1
|
/usr/local/logstash/bin/logstash agent -f logstash_in_kafka.conf &
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
log_format json '{"@timestamp":"$time_iso8601",'
'"slbip":"$remote_addr",'
'"clientip":"$http_x_forwarded_for",'
'"serverip":"$server_addr",'
'"size":$body_bytes_sent,'
'"responsetime":$request_time,'
'"domain":"$host",'
'"method":"$request_method",'
'"requesturi":"$request_uri",'
'"url":"$uri",'
'"appversion":"$HTTP_APP_VERSION",'
'"referer":"$http_referer",'
'"agent":"$http_user_agent",'
'"status":"$status",'
'"devicecode":"$HTTP_HA"}' ;
# 在虚拟主机配置中调用
access_log /alidata/log/nginx/access/access .log json;
|
1
2
|
# rpm 包安装
yum install filebeat-1.2.3-x86_64.rpm -y
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
|
################### Filebeat Configuration Example #########################
############################# Filebeat ######################################
filebeat:
prospectors:
-
paths:
- /var/log/messages
input_type: log
document_type: messages
-
paths:
- /alidata/log/nginx/access/access .log
input_type: log
document_type: nginxacclog
-
paths:
- /alidata/www/logs/laravel .log
input_type: log
document_type: larlog
-
paths:
- /alidata/www/logs/500_error .log
input_type: log
document_type: peiyinlar_500error
-
paths:
- /alidata/www/logs/deposit .log
input_type: log
document_type: lar_deposit
-
paths:
- /alidata/www/logs/call_error .log
input_type: log
document_type: call_error
-
paths:
- /alidata/log/php/php-fpm .log.slow
input_type: log
document_type: phpslowlog
multiline:
pattern: '^[[:space:]]'
negate: true
match: after
registry_file: /var/lib/filebeat/registry
############################# Output ##########################################
output:
logstash:
hosts: [ "10.26.95.215:5044" ]
############################# Shipper #########################################
shipper:
name: "host_6"
############################# Logging #########################################
logging:
files:
rotateeverybytes: 10485760 # = 10MB
|
1
|
/etc/init .d /filebeat start
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
# kafka_to_es.conf
input{
kafka {
zk_connect => "10.10.1.30:2181,10.10.1.31:2181,10.10.1.32:2181"
group_id => "logstash"
topic_id => "peiyinlog"
reset_beginning => false
consumer_threads => 50
decorate_events => true
}
}
# 删除一些不需要的字段
filter {
if [ type ] == "nginxacclog" {
mutate {
remove_field => [ "slbip" , "kafka" , "domain" , "serverip" , "url" , "@version" , "offset" , "input_type" , "count" , "source" , "fields" , "beat.hostname" , "host" , "tags" ]
}
}
}
output {
if [ type ] == "nginxacclog" {
# stdout {codec => rubydebug }
elasticsearch {
hosts => [ "10.10.1.90:9200" , "10.10.1.60:9200" ]
index => "logstash-nginxacclog-%{+YYYY.MM.dd}"
manage_template => true
flush_size => 50000
idle_flush_time => 10
workers => 2
}
}
if [ type ] == "messages" {
elasticsearch {
hosts => [ "10.10.1.90:9200" , "10.10.1.60:9200" ]
index => "logstash-messages-%{+YYYY.MM.dd}"
manage_template => true
flush_size => 50000
idle_flush_time => 30
workers => 1
}
}
if [ type ] == "larlog" {
elasticsearch {
hosts => [ "10.10.1.90:9200" , "10.10.1.60:9200" ]
index => "logstash-larlog-%{+YYYY.MM.dd}"
manage_template => true
flush_size => 2000
idle_flush_time => 10
}
}
if [ type ] == "deposit" {
elasticsearch {
hosts => [ "10.10.1.90:9200" , "10.10.1.60:9200" ]
index => "logstash-deposit-%{+YYYY.MM.dd}"
manage_template => true
flush_size => 2000
idle_flush_time => 10
}
}
if [ type ] == "phpslowlog" {
elasticsearch {
hosts => [ "10.10.1.90:9200" , "10.10.1.60:9200" ]
index => "logstash-phpslowlog-%{+YYYY.MM.dd}"
manage_template => true
flush_size => 2000
idle_flush_time => 10
}
}
}
|
1
|
/usr/local/logstash/bin/logstash agent -f kafka_to_es.conf &
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
curl -XPUT http: //10 .10.1.244:9200 /_template/logstash2 -d '
{
"order" :1,
"template" : "logstash-*" ,
"settings" :{
"index" :{
"refresh_interval" : "120s"
}
},
"mappings" :{
"_default_" :{
"_all" :{
"enabled" : false
}
}
}
}'
|
1
2
|
tar xf kibana-4.5.3-linux-x64. tar .gz
# 很简单,只要解压就可以用。
|
1
2
3
4
5
6
7
8
9
10
11
12
|
# vim kibana-4.5.3-linux-x64/config/kibana.yml
# Kibana is served by a back end server. This controls which port to use.
server.port: 5601
# The host to bind the server to.
server.host: "0.0.0.0"
# The Elasticsearch instance to use for all your queries.
elasticsearch.url: "
# 修改这三个参数就好了
|
更多推荐
所有评论(0)