把 Elasticsearch 当数据库使:聚合后排序

叶小叶 5年前

来自: https://segmentfault.com/a/1190000004462048

使用 https://github.com/taowen/es-monitor 可以用 SQL 进行 elasticsearch 的查询。有的时候分桶聚合之后会产生很多的桶,我们只对其中部分的桶关心。最简单的办法就是排序之后然后取前几位的结果。

ORDER BY _term

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200  SELECT ipo_year, COUNT(*) FROM symbol GROUP BY ipo_year ORDER BY ipo_year LIMIT 2  EOF
{"COUNT(*)": 4, "ipo_year": 1972}  {"COUNT(*)": 1, "ipo_year": 1973}

Elasticsearch

{    "aggs": {      "ipo_year": {        "terms": {          "field": "ipo_year",           "order": [            {              "_term": "asc"            }          ],           "size": 2        },         "aggs": {}      }    },     "size": 0  }

因为 ipo_year 是 GROUP BY 的字段,所以按这个排序用_term指代。

{    "hits": {      "hits": [],       "total": 6714,       "max_score": 0.0    },     "_shards": {      "successful": 1,       "failed": 0,       "total": 1    },     "took": 3,     "aggregations": {      "ipo_year": {        "buckets": [          {            "key": 1972,             "doc_count": 4          },           {            "key": 1973,             "doc_count": 1          }        ],         "sum_other_doc_count": 2893,         "doc_count_error_upper_bound": 0      }    },     "timed_out": false  }

ORDER BY _count

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200  SELECT ipo_year, COUNT(*) AS ipo_count FROM symbol GROUP BY ipo_year ORDER BY ipo_count LIMIT 2  EOF
{"ipo_count": 1, "ipo_year": 1973}  {"ipo_count": 2, "ipo_year": 1980}

Elasticsearch

{    "aggs": {      "ipo_year": {        "terms": {          "field": "ipo_year",           "order": [            {              "_count": "asc"            }          ],           "size": 2        },         "aggs": {}      }    },     "size": 0  }
{    "hits": {      "hits": [],       "total": 6714,       "max_score": 0.0    },     "_shards": {      "successful": 1,       "failed": 0,       "total": 1    },     "took": 2,     "aggregations": {      "ipo_year": {        "buckets": [          {            "key": 1973,             "doc_count": 1          },           {            "key": 1980,             "doc_count": 2          }        ],         "sum_other_doc_count": 2895,         "doc_count_error_upper_bound": -1      }    },     "timed_out": false  }

ORDER BY 指标

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200      SELECT ipo_year, MAX(market_cap) AS max_market_cap FROM symbol       GROUP BY ipo_year ORDER BY max_market_cap LIMIT 2  EOF
{"max_market_cap": 826830000.0, "ipo_year": 1982}  {"max_market_cap": 847180000.0, "ipo_year": 2016}

Elasticsearch

{    "aggs": {      "ipo_year": {        "terms": {          "field": "ipo_year",           "order": [            {              "max_market_cap": "asc"            }          ],           "size": 2        },         "aggs": {          "max_market_cap": {            "max": {              "field": "market_cap"            }          }        }      }    },     "size": 0  }
{    "hits": {      "hits": [],       "total": 6714,       "max_score": 0.0    },     "_shards": {      "successful": 1,       "failed": 0,       "total": 1    },     "took": 20,     "aggregations": {      "ipo_year": {        "buckets": [          {            "max_market_cap": {              "value": 826830000.0            },             "key": 1982,             "doc_count": 4          },           {            "max_market_cap": {              "value": 847180000.0            },             "key": 2016,             "doc_count": 6          }        ],         "sum_other_doc_count": 2888,         "doc_count_error_upper_bound": -1      }    },     "timed_out": false  }

HISTOGRAM 和 ORDER BY

除了 terms aggregation,其他 aggregation 也支持 order by 但是并不完善。比如 histogram aggregation 支持 sort 但是并不支持 size (也就是可以ORDER BY 但是不能 LIMIT)。官方有计划增加一个通用的支持 LIMIT 的方式,不过还没有实现: https://github.com/elastic/elasticsearch/issues/14928

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200      SELECT ipo_year_range, MAX(market_cap) AS max_market_cap FROM symbol       GROUP BY histogram(ipo_year, 10) AS ipo_year_range ORDER BY ipo_year_range   EOF
{"ipo_year_range": 1970, "max_market_cap": 18370000000.0}  {"ipo_year_range": 1980, "max_market_cap": 522690000000.0}  {"ipo_year_range": 1990, "max_market_cap": 230940000000.0}  {"ipo_year_range": 2000, "max_market_cap": 470490000000.0}  {"ipo_year_range": 2010, "max_market_cap": 287470000000.0}

Elasticsearch

{    "aggs": {      "ipo_year_range": {        "aggs": {          "max_market_cap": {            "max": {              "field": "market_cap"            }          }        },         "histogram": {          "field": "ipo_year",           "interval": 10,           "order": {            "_key": "asc"          }        }      }    },     "size": 0  }
{    "hits": {      "hits": [],       "total": 6714,       "max_score": 0.0    },     "_shards": {      "successful": 1,       "failed": 0,       "total": 1    },     "took": 2,     "aggregations": {      "ipo_year_range": {        "buckets": [          {            "max_market_cap": {              "value": 18370000000.0            },             "key": 1970,             "doc_count": 5          },           {            "max_market_cap": {              "value": 522690000000.0            },             "key": 1980,             "doc_count": 155          },           {            "max_market_cap": {              "value": 230940000000.0            },             "key": 1990,             "doc_count": 598          },           {            "max_market_cap": {              "value": 470490000000.0            },             "key": 2000,             "doc_count": 745          },           {            "max_market_cap": {              "value": 287470000000.0            },             "key": 2010,             "doc_count": 1395          }        ]      }    },     "timed_out": false  }
</div>