ES系列14:你知道25种(桶聚合)Bucket Aggs 类型各自的使用场景么?【聚合分析】
https://cloud.tencent.com/developer/article/1747353
按protocol聚合
{
"_source": "protocol",
"size": 1,
"aggs": {
"agg_protocols": {
"terms": {
"field": "protocol.raw",
"size": 1000
}
}
}
}
指定地区,按port聚合
{
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"bool": {
"filter": {
"term": {
"geoip.country_code2.raw": "ID"
}
}
}
}
]
}
}
}
},
"size": 0,
"aggs": {
"agg_port": {
"terms": {
"field": "port",
"size": 9999
}
}
}
}
指定地区和时间段,按ip聚合(独立ip 即ip去重)
{
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"bool": {
"filter": {
"term": {
"geoip3.country_code2": "ID"
}
}
}
},
{
"range": {
"lastupdatetime": {
"gte": "2020-01-01 00:00:00",
"lt": "2021-01-01 00:00:00"
}
}
}
]
}
}
}
},
"_source": ["ip", "port", "protocol"],
"size": 0,
"aggs": {
"distinct_ips": {
"cardinality": {
"field": "ip",
"precision_threshold": 40000
}
}
}
}
并且
port=80 并且country_code2=VN , 结果按独立ip聚合
例一:
{
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"bool": {
"filter": {
"term": {
"port": "80"
}
}
}
},
{
"bool": {
"filter": {
"term": {
"geoip2.country_code2": "VN"
}
}
}
}
]
}
}
}
},
"_source": [],
"size": 1,
"aggs": {
"distinct_ips": {
"cardinality": {
"field": "ip",
"precision_threshold": 40000
}
}
}
}
例二:
(ip=”106.75.96.205” && port=8082) && (product=”其他基础软件” ||product=”NGINX” )
{
"query": {
"bool": {
"must": [
{
"term": {
"ip.ip_raw": "106.75.96.205"
}
},
{
"term": {
"port.port_raw": "8082"
}
},
{
"bool": {
"should": [
{
"term": {
"product.raw": "其他基础软件"
}
},
{
"term": {
"product.raw": "NGINX"
}
}
]
}
}
],
"must_not": [],
"should": []
}
},
"from": 0,
"size": 10,
"sort": [],
"aggs": {}
}
例三:
(ip=”106.75.96.205” && port=8082) && (product=”其他基础软件” ||product=”NGINX” )
{
"query": {
"bool": {
"must": [
{
"term": {
"ip.ip_raw": "106.75.96.205"
}
},
{
"term": {
"port.port_raw": "8082"
}
},
{
"terms": {
"product.raw": [
"其他基础软件",
"NGINX"
]
}
}
],
"must_not": [],
"should": []
}
},
"from": 0,
"size": 10,
"sort": [],
"aggs": {}
}
product是个数组:
例四:
port=80 && (geoip3.country_code2=”ID” || geoip3.country_code2=”VN”) , 结果按独立ip聚合
cardinality : 独立IP数
{
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"bool": {
"filter": {
"term": {
"port": "80"
}
}
}
},
{
"bool": {
"should": [
{
"match": {
"geoip3.country_code2": "ID"
}
},
{
"match": {
"geoip3.country_code2": "VN"
}
}
]
}
}
]
}
}
}
},
"_source": [
"ip",
"port",
"geoip3"
],
"size": 2,
"aggs": {
"distinct_ips": {
"cardinality": {
"field": "ip",
"precision_threshold": 40000
}
}
}
}
聚合再求独立ip数
{
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"term": {
"asn.as_organization": "Sen"
}
},
{
"term": {
"asn.as_organization": "Tt"
}
}
]
}
}
]
}
}
}
},
"_source": [
"ip"
],
"size": 0,
"aggs": {
"agg_country_name": {
"terms": {
"field": "geoip.country_name.raw",
"size": 10
},
"aggs": {
"agg_country_name_ip": {
"cardinality": {
"field": "ip"
}
}
}
}
}
}
效果:
cardinality 即去重计算,类似sql中 count(distinct),先去重再求和,计算指定field值的种类数。
例子:
https://blog.csdn.net/u010503427/article/details/81592468
参考:
https://www.jianshu.com/p/1b430a637971
nested 聚合
GET assets/_search
{
"size": 0,
"aggs": {
"rule_tags": {
"nested": {
"path": "rule_tags"
},
"aggs": {
"group_by_nested": {
"terms": {
"field": "rule_tags.products",
"size": 10
}
}
}
}
}
, "track_total_hits": true
}
结果:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 10,
"successful" : 10,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 112537,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"rule_tags" : {
"doc_count" : 37524,
"group_by_nested" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ ]
}
}
}
}
例一:聚合后将聚合结果进行分页的解决办法
原数据:
{
"took":0,
"timed_out":false,
"_shards":{
"total":6,
"successful":6,
"skipped":0,
"failed":0
},
"hits":{
"total":{
"value":25,
"relation":"eq"
},
"max_score":1,
"hits":[
{
"_index":"certdata-domaindetection",
"_type":"_doc",
"_id":"d4eea6ea5ee801301f11b1bf68770a2e",
"_score":1,
"_source":{
"beginAt":"2022-10-18 21:58:12",
"data":{
"ipv4":[
"66.102.251.24"
],
"spentTime":76
},
"endAt":"2022-10-18 21:59:55",
"errorMsg":"",
"nodeID":"domainDetection_1",
"parentTaskID":"1019",
"storageAt":"2022-10-18 21:59:55",
"success":false,
"targetHost":"sina.com",
"taskID":"1879969334"
}
}
]
}
}
query
POST certdata-domaindetection/_search
{
"query":{
"bool":{
"must":[
{
"term":{
"data.ipv4":{
"value":"47.103.24.173"
}
}
}
]
}
},
"size":1,
"aggs":{
"agg_targetHost":{
"terms":{
"field":"targetHost",
"size":999999
},
"aggs":{
"myBucketSort":{
"bucket_sort":{
"from":0,
"size":2,
"gap_policy":"skip"
}
}
}
},
"termsCount":{
"cardinality":{
"field":"targetHost",
"precision_threshold":30000
}
}
}
}
结果:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 6,
"successful" : 6,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 2.140244,
"hits" : [
{
"_index" : "certdata-domaindetection",
"_type" : "_doc",
"_id" : "a1db3d40a4088d61c7526cc272fa1b82",
"_score" : 2.140244,
"_source" : {
"beginAt" : "2022-10-19 18:06:47",
"data" : {
"ipv4" : [
"139.159.241.37",
"8.134.50.24",
"47.103.24.173",
"119.3.70.188",
"120.92.78.97"
],
"spentTime" : 41
},
"endAt" : "2022-10-19 18:08:22",
"nodeID" : "domainDetection_1",
"parentTaskID" : "1031",
"storageAt" : "2022-10-19 18:08:23",
"targetHost" : "bilibili.com",
"taskID" : "1879969356"
}
}
]
},
"aggregations" : {
"agg_targetHost" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "bilibili.com",
"doc_count" : 2
}
]
},
"termsCount" : {
"value" : 1
}
}
}
go代码:
type AggStatKeyStrMap map[string]int64
// blockChainStatWithParamInput ... 入参
type blockChainStatWithParamInput struct {
model.PublicChainType
Status model.BlockchainStatus
Country []string
Province string
City string
Page int
Size int
}
/*
按搜索条件聚合指定字段
isStatusSearch 是否按状态搜索
aggField 聚合的字段
*/
func (d *blockChainService) GetBlockChainStatWithParam(ctx context.Context, in blockChainStatWithParamInput, isStatusSearch bool, aggField string) (aggStatMap model.AggStatKeyStrMap, count int64, err error) {
ctx, span := gtrace.NewSpan(ctx, "GetBlockChainStatWithParam")
defer span.End()
span.SetAttributes(
attribute.String("GetBlockChainStatWithParam.AggField", aggField),
)
query := elasticCore.NewBoolQuery()
if in.Type > 0 {
query.Must(elasticCore.NewTermQuery("type", in.Type))
}
if isStatusSearch {
query.Must(elasticCore.NewTermQuery("status", in.Status))
}
if len(in.Country) > 0 {
tmpInter := make([]interface{}, 0)
for _, v := range in.Country {
tmpInter = append(tmpInter, v)
}
query.Must(elasticCore.NewTermsQuery("country", tmpInter...))
}
if len(in.Province) > 0 {
query.Must(elasticCore.NewTermQuery("province", in.Province))
}
if len(in.City) > 0 {
query.Must(elasticCore.NewTermQuery("city", in.City))
}
sourceQ, _ := query.Source()
fofacore.PrintQuery(sourceQ)
//该单位备案的其他域名
pageIndex := 1
if in.Page != 0 {
pageIndex = in.Page
}
pageSize := 10
if in.Size != 0 {
pageSize = in.Size
}
ctgAgg := elasticCore.NewTermsAggregation().Field(aggField)
source, _ := ctgAgg.Source()
marshalSource, _ := json.Marshal(source)
glog.Debugf(ctx, "GetBlockChainAddrStat source:%v \n", string(marshalSource))
span.SetAttributes(
attribute.String("ast.EsIndexName", string(ast.EsBlockchainNodedetect)),
)
// 获取去重的字段数量(通用)
cardinalityAgg := elasticCore.NewCardinalityAggregation().
Field(aggField).
PrecisionThreshold(3000)
//Rehash(true)
//打印query语句
sourceCardinalityAgg, _ := cardinalityAgg.Source()
fofacore.PrintQuery(sourceCardinalityAgg)
res, err := EsClient(string(ast.EsBlockchainNodedetect)).
Query(query).
From((pageIndex-1)*pageSize).
Size(pageSize).
Aggregation("ctg", ctgAgg).Pretty(true).
Aggregation("cardinalityCount", cardinalityAgg).
Do(context.Background())
if err != nil {
glog.Errorf(ctx, "[GetBlockChainAddrStat] es error: %s", err)
return
}
// 去重后的总数
term, _ := res.Aggregations.Cardinality("cardinalityCount")
for _, bucket := range term.Aggregations {
b, _ := bucket.MarshalJSON()
t, _ := strconv.Atoi(string(b))
count = int64(t)
}
if count == 0 {
return
}
ctg, found := res.Aggregations.Terms("ctg")
if !found {
glog.Errorf(ctx, "[GetBlockChainAddrStat] es can not get res")
return
}
if len(ctg.Buckets) == 0 {
return
}
var aggStatKeyStrMap = make(model.AggStatKeyStrMap, 0)
for _, item := range ctg.Buckets {
aggStatKeyStrMap[item.Key.(string)] = item.DocCount
}
return aggStatKeyStrMap, count, nil
}
例二:聚合后将聚合结果进行分页的解决办法
数据源
{
"took":2,
"timed_out":false,
"_shards":{
"total":4,
"successful":4,
"skipped":0,
"failed":0
},
"hits":{
"total":{
"value":1,
"relation":"eq"
},
"max_score":6.5279574,
"hits":[
{
"_index":"metadata",
"_type":"_doc",
"_id":"120.132.118.50:443",
"_score":6.5279574,
"_source":{
"id":"120.132.118.50:443",
"create_at":"2023-05-08 15:39:15",
"update_at":"2023-05-08 15:39:15",
"type":"metadata",
"asset_type":"",
"ip":"120.132.118.50",
"port":443,
"protocol":"https",
"base_protocol":"tcp",
"geoip":{
"isp":"",
"city":"",
"region":"",
"country":"CN",
"country_name":"中国",
"continent":"",
"timezone":"",
"longitude":113.722,
"latitude":34.7732
},
"host":"https://gcreat.panelook.com",
"hostnames":[
"https://gcreat.panelook.com"
],
"domain":[
"panelook.com"
],
"icp":"",
"subdomain":[
"panelook.com"
],
"os":[
"windows"
],
"server":"Apache/2.4.39 (Win64) OpenSSL/1.1.1b mod_fcgid/2.3.9a mod_log_rotate/1.02",
"title":"",
"header":"header test",
"banner":"",
"body":"",
"cert":"123123123",
"certs":{
"cert_date":"0",
"cert_num":0,
"domain":"",
"is_valid":true,
"issuer_cn":"",
"issuer_cns":[
],
"issuer_org":"",
"not_after":"0",
"not_before":"0",
"sig_alth":"",
"sn":"",
"subject_cn":"",
"subject_cns":[
],
"subject_org":"",
"valid_type":""
},
"critical":null,
"asn":{
"number":"56040",
"organization":"China Mobile communications corporation"
},
"products":[
{
"product":"mod_fcgid",
"level":0,
"category":"其他支撑系统",
"parent_category":"",
"softhard":"",
"company":"",
"version":""
},
{
"product":"OpenSSL",
"level":0,
"category":"数据证书",
"parent_category":"",
"softhard":"",
"company":"",
"version":""
},
{
"product":"Google-Analytics",
"level":0,
"category":"其他企业应用",
"parent_category":"",
"softhard":"",
"company":"",
"version":""
},
{
"product":"php",
"level":0,
"category":"脚本语言",
"parent_category":"",
"softhard":"",
"company":"",
"version":""
},
{
"product":"WIN64",
"level":0,
"category":"服务",
"parent_category":"",
"softhard":"",
"company":"",
"version":""
},
{
"product":"RapidSSL-TLS-DV-RSA-Mixed-SHA256-2020-CA",
"level":0,
"category":"其他企业应用",
"parent_category":"",
"softhard":"",
"company":"",
"version":""
},
{
"product":"APACHE-HTTP_Server",
"level":0,
"category":"服务",
"parent_category":"",
"softhard":"",
"company":"",
"version":""
},
{
"product":"RapidSSL-Cert",
"level":0,
"category":"数据证书",
"parent_category":"",
"softhard":"",
"company":"",
"version":""
},
{
"product":"digicert-Cert",
"level":0,
"category":"数据证书",
"parent_category":"",
"softhard":"",
"company":"",
"version":""
},
{
"product":"RapidSSL-TLS-DV-RSA-Mixed-SHA256-2020-CA-1",
"level":0,
"category":"其他企业应用",
"parent_category":"",
"softhard":"",
"company":"",
"version":""
}
],
"dbs":null,
"favicon":{
"base64":"",
"hash":"",
"url":""
},
"vulnerabilities":null
}
}
]
},
"aggregations":{
"host":{
"doc_count_error_upper_bound":0,
"sum_other_doc_count":0,
"buckets":[
{
"key":"https://gcreat.panelook.com",
"doc_count":1
}
]
}
}
}
DSL
GET metadata/_search
{
"size": 0,
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"range": {
"update_at": {
"from": "now-5y",
"include_lower": true,
"include_upper": true,
"to": null
}
}
},
{
"nested": {
"path": "vulnerabilities",
"query": {
"bool": {
"must": {
"exists": {
"field": "vulnerabilities"
}
}
}
}
}
}
]
}
},
"boost": 1.2
}
},
"aggs": {
"vGroup": {
"nested": {
"path": "products"
},
"aggs": {
"filterField": {
"filter": {
"bool": {
"must_not": [
{
"term": {
"products.proudct": {
"value": ""
}
}
}
]
}
},
"aggs": {
"product": {
"terms": {
"field": "products.product",
"order": [
{
"_count": "desc"
}
],
"size": 99999
},
"aggs": {
"myBucketSort": {
"bucket_sort": {
"from": 0,
"size": 10,
"gap_policy": "skip"
}
}
}
}
}
},
"cardinalityCount": {
"cardinality": {
"field": "products.product",
"precision_threshold": 30000
}
}
}
}
}
}
结果:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 4,
"successful" : 4,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"vGroup" : {
"meta" : { },
"doc_count" : 13,
"filterField" : {
"meta" : { },
"doc_count" : 13,
"product" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Alt-Svc",
"doc_count" : 1
},
{
"key" : "CLOUDFLARE-产品",
"doc_count" : 1
},
{
"key" : "Eggdig",
"doc_count" : 1
},
{
"key" : "Fathercook",
"doc_count" : 1
},
{
"key" : "Horrorhas",
"doc_count" : 1
},
{
"key" : "Hostwill",
"doc_count" : 1
},
{
"key" : "Llamadoes",
"doc_count" : 1
},
{
"key" : "Microsoft-ASP",
"doc_count" : 1
},
{
"key" : "Microsoft-ASP.NET",
"doc_count" : 1
},
{
"key" : "Milkdream",
"doc_count" : 1
}
]
}
},
"cardinalityCount" : {
"value" : 13
}
}
}
}
子聚合
例子:
POST test/_search
{
"size": 0,
"aggs": {
"agg_country_protocol_type": {
"terms": {
"field": "geoip.country_name.raw",
"size": 999999,
"order": {
"_key": "asc"
}
},
"aggs": {
"protocol_agg": {
"terms": {
"field": "protocol.raw",
"size": 999999,
"order": {
"_key": "asc"
}
}
}
}
},
"agg_compony_type": {
"terms": {
"field": "company.keyword",
"size": 999999,
"order": {
"_key": "asc"
}
},
"aggs": {
"agg_compony_protocol": {
"terms": {
"field": "protocol.raw",
"size": 999999,
"order": {
"_key": "asc"
}
}
}
}
}
}
}
获取某分组条件下所有桶的指定度量的和
https://www.cnblogs.com/nysd/p/12858355.html
ElasticSearch 聚合查询(求和、最大值、最小值、平均值、去重、百分比、占比、中位数、topN、分组聚合)
https://blog.csdn.net/feizuiku0116/article/details/123141352
搜索country_code 为中国的
{
"bool":{
"should":{
"terms":{
"geoip.country_code2.raw":[
"CN",
"TW",
"HK",
"MO"
]
}
}
}
}
作者:海马 创建时间:2023-03-19 11:09
最后编辑:admin 更新时间:2024-12-22 19:32
最后编辑:admin 更新时间:2024-12-22 19:32