ES系列14:你知道25种(桶聚合)Bucket Aggs 类型各自的使用场景么?【聚合分析】

https://cloud.tencent.com/developer/article/1747353

按protocol聚合

{
  "_source": "protocol", 
  "size": 1,
  "aggs": {
    "agg_protocols": {
      "terms": {
        "field": "protocol.raw",
        "size": 1000
      }
    }
  }
}

指定地区,按port聚合

{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "bool": {
                "filter": {
                  "term": {
                    "geoip.country_code2.raw": "ID"
                  }
                }
              }
            }
          ]
        }
      }
    }
  },
  "size": 0,
  "aggs": {
    "agg_port": {
      "terms": {
        "field": "port",
        "size": 9999
      }
    }
  }
}

指定地区和时间段,按ip聚合(独立ip 即ip去重)

{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "bool": {
                "filter": {
                  "term": {
                    "geoip3.country_code2": "ID"
                  }
                }
              }
            },
            {
              "range": {
                "lastupdatetime": {
                  "gte": "2020-01-01 00:00:00",
                  "lt": "2021-01-01 00:00:00"
                }
              }
            }
          ]
        }
      }
    }
  },
  "_source": ["ip", "port", "protocol"],
  "size": 0,
  "aggs": {
    "distinct_ips": {
      "cardinality": {
        "field": "ip",
        "precision_threshold": 40000
      }
    }
  }
}

并且

port=80 并且country_code2=VN , 结果按独立ip聚合

例一:

{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "bool": {
                "filter": {
                  "term": {
                    "port": "80"
                  }
                }
              }
            },
            {
              "bool": {
                "filter": {
                  "term": {
                    "geoip2.country_code2": "VN"
                  }
                }
              }
            }
          ]
        }
      }
    }
  },
  "_source": [],
  "size": 1,
  "aggs": {
    "distinct_ips": {
      "cardinality": {
        "field": "ip",
        "precision_threshold": 40000
      }
    }
  }
}

例二:
(ip=”106.75.96.205” && port=8082) && (product=”其他基础软件” ||product=”NGINX” )

{
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "ip.ip_raw": "106.75.96.205"
          }
        },
        {
          "term": {
            "port.port_raw": "8082"
          }
        },
        {
          "bool": {
            "should": [
              {
                "term": {
                  "product.raw": "其他基础软件"
                }
              },
              {
                "term": {
                  "product.raw": "NGINX"
                }
              }
            ]
          }
        }
      ],
      "must_not": [],
      "should": []
    }
  },
  "from": 0,
  "size": 10,
  "sort": [],
  "aggs": {}
}

例三:
(ip=”106.75.96.205” && port=8082) && (product=”其他基础软件” ||product=”NGINX” )

{
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "ip.ip_raw": "106.75.96.205"
          }
        },
        {
          "term": {
            "port.port_raw": "8082"
          }
        },
        {
          "terms": {
            "product.raw": [
              "其他基础软件",
              "NGINX"
            ]
          }
        }
      ],
      "must_not": [],
      "should": []
    }
  },
  "from": 0,
  "size": 10,
  "sort": [],
  "aggs": {}
}

product是个数组:

例四:
port=80 && (geoip3.country_code2=”ID” || geoip3.country_code2=”VN”) , 结果按独立ip聚合
cardinality : 独立IP数

{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "bool": {
                "filter": {
                  "term": {
                    "port": "80"
                  }
                }
              }
            },
            {
              "bool": {
                "should": [
                  {
                    "match": {
                      "geoip3.country_code2": "ID"
                    }
                  },
                  {
                    "match": {
                      "geoip3.country_code2": "VN"
                    }
                  }
                ]
              }
            }
          ]
        }
      }
    }
  },
  "_source": [
    "ip",
    "port",
    "geoip3"
  ],
  "size": 2,
  "aggs": {
    "distinct_ips": {
      "cardinality": {
        "field": "ip",
        "precision_threshold": 40000
      }
    }
  }
}

聚合再求独立ip数

{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "bool": {
                "should": [
                  {
                    "term": {
                      "asn.as_organization": "Sen"
                    }
                  },
                  {
                    "term": {
                      "asn.as_organization": "Tt"
                    }
                  }
                ]
              }
            }
          ]
        }
      }
    }
  },
  "_source": [
    "ip"
  ],
  "size": 0,
  "aggs": {
    "agg_country_name": {
      "terms": {
        "field": "geoip.country_name.raw",
        "size": 10
      },
      "aggs": {
        "agg_country_name_ip": {
          "cardinality": {
            "field": "ip"
          }
        }
      }
    }
  }
}

效果:

cardinality 即去重计算,类似sql中 count(distinct),先去重再求和,计算指定field值的种类数。

例子:
https://blog.csdn.net/u010503427/article/details/81592468

参考:
https://www.jianshu.com/p/1b430a637971

nested 聚合

GET assets/_search
{
  "size": 0, 
  "aggs": {
    "rule_tags": {
      "nested": {
        "path": "rule_tags"
      },
      "aggs": {
        "group_by_nested": {
          "terms": {
            "field": "rule_tags.products",
            "size": 10
          }
        }
      }
    }
  }
  , "track_total_hits": true
}

结果:

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 10,
    "successful" : 10,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 112537,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "rule_tags" : {
      "doc_count" : 37524,
      "group_by_nested" : {
        "doc_count_error_upper_bound" : 0,
        "sum_other_doc_count" : 0,
        "buckets" : [ ]
      }
    }
  }
}

例一:聚合后将聚合结果进行分页的解决办法

原数据:

{
    "took":0,
    "timed_out":false,
    "_shards":{
        "total":6,
        "successful":6,
        "skipped":0,
        "failed":0
    },
    "hits":{
        "total":{
            "value":25,
            "relation":"eq"
        },
        "max_score":1,
        "hits":[
            {
                "_index":"certdata-domaindetection",
                "_type":"_doc",
                "_id":"d4eea6ea5ee801301f11b1bf68770a2e",
                "_score":1,
                "_source":{
                    "beginAt":"2022-10-18 21:58:12",
                    "data":{
                        "ipv4":[
                            "66.102.251.24"
                        ],
                        "spentTime":76
                    },
                    "endAt":"2022-10-18 21:59:55",
                    "errorMsg":"",
                    "nodeID":"domainDetection_1",
                    "parentTaskID":"1019",
                    "storageAt":"2022-10-18 21:59:55",
                    "success":false,
                    "targetHost":"sina.com",
                    "taskID":"1879969334"
                }
            }
        ]
    }
}

query

POST certdata-domaindetection/_search
{
    "query":{
        "bool":{
            "must":[
                {
                    "term":{
                        "data.ipv4":{
                            "value":"47.103.24.173"
                        }
                    }
                }
            ]
        }
    },
    "size":1,
    "aggs":{
        "agg_targetHost":{
            "terms":{
                "field":"targetHost",
                "size":999999
            },
            "aggs":{
                "myBucketSort":{
                    "bucket_sort":{
                        "from":0,
                        "size":2,
                        "gap_policy":"skip"
                    }
                }
            }
        },
        "termsCount":{
            "cardinality":{
                "field":"targetHost",
                "precision_threshold":30000
            }
        }
    }
}

结果:

{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 6,
    "successful" : 6,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 2.140244,
    "hits" : [
      {
        "_index" : "certdata-domaindetection",
        "_type" : "_doc",
        "_id" : "a1db3d40a4088d61c7526cc272fa1b82",
        "_score" : 2.140244,
        "_source" : {
          "beginAt" : "2022-10-19 18:06:47",
          "data" : {
            "ipv4" : [
              "139.159.241.37",
              "8.134.50.24",
              "47.103.24.173",
              "119.3.70.188",
              "120.92.78.97"
            ],
            "spentTime" : 41
          },
          "endAt" : "2022-10-19 18:08:22",
          "nodeID" : "domainDetection_1",
          "parentTaskID" : "1031",
          "storageAt" : "2022-10-19 18:08:23",
          "targetHost" : "bilibili.com",
          "taskID" : "1879969356"
        }
      }
    ]
  },
  "aggregations" : {
    "agg_targetHost" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "bilibili.com",
          "doc_count" : 2
        }
      ]
    },
    "termsCount" : {
      "value" : 1
    }
  }
}

go代码:


type AggStatKeyStrMap map[string]int64

// blockChainStatWithParamInput ...  入参
type blockChainStatWithParamInput struct {
    model.PublicChainType
    Status   model.BlockchainStatus
    Country  []string
    Province string
    City     string
    Page     int
    Size     int
}


/*
    按搜索条件聚合指定字段
    isStatusSearch 是否按状态搜索
    aggField 聚合的字段
*/

func (d *blockChainService) GetBlockChainStatWithParam(ctx context.Context, in blockChainStatWithParamInput, isStatusSearch bool, aggField string) (aggStatMap model.AggStatKeyStrMap, count int64, err error) {
    ctx, span := gtrace.NewSpan(ctx, "GetBlockChainStatWithParam")
    defer span.End()
    span.SetAttributes(
        attribute.String("GetBlockChainStatWithParam.AggField", aggField),
    )

    query := elasticCore.NewBoolQuery()

    if in.Type > 0 {
        query.Must(elasticCore.NewTermQuery("type", in.Type))
    }
    if isStatusSearch {
        query.Must(elasticCore.NewTermQuery("status", in.Status))
    }
    if len(in.Country) > 0 {
        tmpInter := make([]interface{}, 0)
        for _, v := range in.Country {
            tmpInter = append(tmpInter, v)
        }
        query.Must(elasticCore.NewTermsQuery("country", tmpInter...))
    }
    if len(in.Province) > 0 {
        query.Must(elasticCore.NewTermQuery("province", in.Province))
    }
    if len(in.City) > 0 {
        query.Must(elasticCore.NewTermQuery("city", in.City))
    }
    sourceQ, _ := query.Source()
    fofacore.PrintQuery(sourceQ)

    //该单位备案的其他域名
    pageIndex := 1
    if in.Page != 0 {
        pageIndex = in.Page
    }
    pageSize := 10
    if in.Size != 0 {
        pageSize = in.Size
    }
    ctgAgg := elasticCore.NewTermsAggregation().Field(aggField)
    source, _ := ctgAgg.Source()
    marshalSource, _ := json.Marshal(source)
    glog.Debugf(ctx, "GetBlockChainAddrStat source:%v \n", string(marshalSource))

    span.SetAttributes(
        attribute.String("ast.EsIndexName", string(ast.EsBlockchainNodedetect)),
    )

    // 获取去重的字段数量(通用)
    cardinalityAgg := elasticCore.NewCardinalityAggregation().
        Field(aggField).
        PrecisionThreshold(3000)
    //Rehash(true)

    //打印query语句
    sourceCardinalityAgg, _ := cardinalityAgg.Source()
    fofacore.PrintQuery(sourceCardinalityAgg)

    res, err := EsClient(string(ast.EsBlockchainNodedetect)).
        Query(query).
        From((pageIndex-1)*pageSize).
        Size(pageSize).
        Aggregation("ctg", ctgAgg).Pretty(true).
        Aggregation("cardinalityCount", cardinalityAgg).
        Do(context.Background())
    if err != nil {
        glog.Errorf(ctx, "[GetBlockChainAddrStat] es error: %s", err)
        return
    }

    // 去重后的总数
    term, _ := res.Aggregations.Cardinality("cardinalityCount")
    for _, bucket := range term.Aggregations {
        b, _ := bucket.MarshalJSON()
        t, _ := strconv.Atoi(string(b))
        count = int64(t)
    }
    if count == 0 {
        return
    }

    ctg, found := res.Aggregations.Terms("ctg")
    if !found {
        glog.Errorf(ctx, "[GetBlockChainAddrStat] es can not get res")
        return
    }
    if len(ctg.Buckets) == 0 {
        return
    }
    var aggStatKeyStrMap = make(model.AggStatKeyStrMap, 0)
    for _, item := range ctg.Buckets {
        aggStatKeyStrMap[item.Key.(string)] = item.DocCount
    }

    return aggStatKeyStrMap, count, nil
}

参考文档:
https://wenku.baidu.com/view/da561c28954bcf84b9d528ea81c758f5f61f291d.html?_wkts_=1667355955869&bdQuery=elasticsearch+bucket_sort

例二:聚合后将聚合结果进行分页的解决办法

数据源

{
    "took":2,
    "timed_out":false,
    "_shards":{
        "total":4,
        "successful":4,
        "skipped":0,
        "failed":0
    },
    "hits":{
        "total":{
            "value":1,
            "relation":"eq"
        },
        "max_score":6.5279574,
        "hits":[
            {
                "_index":"metadata",
                "_type":"_doc",
                "_id":"120.132.118.50:443",
                "_score":6.5279574,
                "_source":{
                    "id":"120.132.118.50:443",
                    "create_at":"2023-05-08 15:39:15",
                    "update_at":"2023-05-08 15:39:15",
                    "type":"metadata",
                    "asset_type":"",
                    "ip":"120.132.118.50",
                    "port":443,
                    "protocol":"https",
                    "base_protocol":"tcp",
                    "geoip":{
                        "isp":"",
                        "city":"",
                        "region":"",
                        "country":"CN",
                        "country_name":"中国",
                        "continent":"",
                        "timezone":"",
                        "longitude":113.722,
                        "latitude":34.7732
                    },
                    "host":"https://gcreat.panelook.com",
                    "hostnames":[
                        "https://gcreat.panelook.com"
                    ],
                    "domain":[
                        "panelook.com"
                    ],
                    "icp":"",
                    "subdomain":[
                        "panelook.com"
                    ],
                    "os":[
                        "windows"
                    ],
                    "server":"Apache/2.4.39 (Win64) OpenSSL/1.1.1b mod_fcgid/2.3.9a mod_log_rotate/1.02",
                    "title":"",
                    "header":"header test",
                    "banner":"",
                    "body":"",
                    "cert":"123123123",
                    "certs":{
                        "cert_date":"0",
                        "cert_num":0,
                        "domain":"",
                        "is_valid":true,
                        "issuer_cn":"",
                        "issuer_cns":[

                        ],
                        "issuer_org":"",
                        "not_after":"0",
                        "not_before":"0",
                        "sig_alth":"",
                        "sn":"",
                        "subject_cn":"",
                        "subject_cns":[

                        ],
                        "subject_org":"",
                        "valid_type":""
                    },
                    "critical":null,
                    "asn":{
                        "number":"56040",
                        "organization":"China Mobile communications corporation"
                    },
                    "products":[
                        {
                            "product":"mod_fcgid",
                            "level":0,
                            "category":"其他支撑系统",
                            "parent_category":"",
                            "softhard":"",
                            "company":"",
                            "version":""
                        },
                        {
                            "product":"OpenSSL",
                            "level":0,
                            "category":"数据证书",
                            "parent_category":"",
                            "softhard":"",
                            "company":"",
                            "version":""
                        },
                        {
                            "product":"Google-Analytics",
                            "level":0,
                            "category":"其他企业应用",
                            "parent_category":"",
                            "softhard":"",
                            "company":"",
                            "version":""
                        },
                        {
                            "product":"php",
                            "level":0,
                            "category":"脚本语言",
                            "parent_category":"",
                            "softhard":"",
                            "company":"",
                            "version":""
                        },
                        {
                            "product":"WIN64",
                            "level":0,
                            "category":"服务",
                            "parent_category":"",
                            "softhard":"",
                            "company":"",
                            "version":""
                        },
                        {
                            "product":"RapidSSL-TLS-DV-RSA-Mixed-SHA256-2020-CA",
                            "level":0,
                            "category":"其他企业应用",
                            "parent_category":"",
                            "softhard":"",
                            "company":"",
                            "version":""
                        },
                        {
                            "product":"APACHE-HTTP_Server",
                            "level":0,
                            "category":"服务",
                            "parent_category":"",
                            "softhard":"",
                            "company":"",
                            "version":""
                        },
                        {
                            "product":"RapidSSL-Cert",
                            "level":0,
                            "category":"数据证书",
                            "parent_category":"",
                            "softhard":"",
                            "company":"",
                            "version":""
                        },
                        {
                            "product":"digicert-Cert",
                            "level":0,
                            "category":"数据证书",
                            "parent_category":"",
                            "softhard":"",
                            "company":"",
                            "version":""
                        },
                        {
                            "product":"RapidSSL-TLS-DV-RSA-Mixed-SHA256-2020-CA-1",
                            "level":0,
                            "category":"其他企业应用",
                            "parent_category":"",
                            "softhard":"",
                            "company":"",
                            "version":""
                        }
                    ],
                    "dbs":null,
                    "favicon":{
                        "base64":"",
                        "hash":"",
                        "url":""
                    },
                    "vulnerabilities":null
                }
            }
        ]
    },
    "aggregations":{
        "host":{
            "doc_count_error_upper_bound":0,
            "sum_other_doc_count":0,
            "buckets":[
                {
                    "key":"https://gcreat.panelook.com",
                    "doc_count":1
                }
            ]
        }
    }
}

DSL

GET metadata/_search
{
  "size": 0,
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "range": {
                "update_at": {
                  "from": "now-5y",
                  "include_lower": true,
                  "include_upper": true,
                  "to": null
                }
              }
            },
            {
              "nested": {
                "path": "vulnerabilities",
                "query": {
                  "bool": {
                    "must": {
                      "exists": {
                        "field": "vulnerabilities"
                      }
                    }
                  }
                }
              }
            }
          ]
        }
      },
      "boost": 1.2
    }
  },
  "aggs": {
    "vGroup": {
      "nested": {
        "path": "products"
      },
      "aggs": {
        "filterField": {
          "filter": {
            "bool": {
              "must_not": [
                {
                  "term": {
                    "products.proudct": {
                      "value": ""
                    }
                  }
                }
              ]
            }
          },
          "aggs": {
            "product": {
              "terms": {
                "field": "products.product",
                "order": [
                  {
                    "_count": "desc"
                  }
                ],
                "size": 99999
              },
              "aggs": {
                "myBucketSort": {
                  "bucket_sort": {
                    "from": 0,
                    "size": 10,
                    "gap_policy": "skip"
                  }
                }
              }
            }
          }
        },
        "cardinalityCount": {
          "cardinality": {
            "field": "products.product",
            "precision_threshold": 30000
          }
        }
      }
    }
  }
}

结果:

{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 4,
    "successful" : 4,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 10,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "vGroup" : {
      "meta" : { },
      "doc_count" : 13,
      "filterField" : {
        "meta" : { },
        "doc_count" : 13,
        "product" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 0,
          "buckets" : [
            {
              "key" : "Alt-Svc",
              "doc_count" : 1
            },
            {
              "key" : "CLOUDFLARE-产品",
              "doc_count" : 1
            },
            {
              "key" : "Eggdig",
              "doc_count" : 1
            },
            {
              "key" : "Fathercook",
              "doc_count" : 1
            },
            {
              "key" : "Horrorhas",
              "doc_count" : 1
            },
            {
              "key" : "Hostwill",
              "doc_count" : 1
            },
            {
              "key" : "Llamadoes",
              "doc_count" : 1
            },
            {
              "key" : "Microsoft-ASP",
              "doc_count" : 1
            },
            {
              "key" : "Microsoft-ASP.NET",
              "doc_count" : 1
            },
            {
              "key" : "Milkdream",
              "doc_count" : 1
            }
          ]
        }
      },
      "cardinalityCount" : {
        "value" : 13
      }
    }
  }
}

子聚合

例子:

POST test/_search
{
  "size": 0,
  "aggs": {
    "agg_country_protocol_type": {
      "terms": {
        "field": "geoip.country_name.raw",
        "size": 999999,
        "order": {
          "_key": "asc"
        }
      },
      "aggs": {
        "protocol_agg": {
          "terms": {
            "field": "protocol.raw",
            "size": 999999,
            "order": {
              "_key": "asc"
            }
          }
        }
      }
    },
    "agg_compony_type": {
      "terms": {
        "field": "company.keyword",
        "size": 999999,
        "order": {
          "_key": "asc"
        }
      },
      "aggs": {
        "agg_compony_protocol": {
          "terms": {
            "field": "protocol.raw",
            "size": 999999,
            "order": {
              "_key": "asc"
            }
          }
        }
      }
    }
  }
}

获取某分组条件下所有桶的指定度量的和

https://www.cnblogs.com/nysd/p/12858355.html

ElasticSearch 聚合查询(求和、最大值、最小值、平均值、去重、百分比、占比、中位数、topN、分组聚合)

https://blog.csdn.net/feizuiku0116/article/details/123141352

搜索country_code 为中国的

{
    "bool":{
        "should":{
            "terms":{
                "geoip.country_code2.raw":[
                    "CN",
                    "TW",
                    "HK",
                    "MO"
                ]
            }
        }
    }
}
作者:海马  创建时间:2023-03-19 11:09
最后编辑:admin  更新时间:2024-05-10 15:32