把 Elasticsearch 当数据库使

wymv5244 8年前

来自: https://segmentfault.com/a/1190000004454399

今天需要做一些最简单的聚合查询

COUNT(*)

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200  select count(*) from quote                                             EOF
{"count(*)": 20994400}

Elasticsearch

{    "aggs": {},     "size": 0  }
{    "hits": {      "hits": [],       "total": 20994400,       "max_score": 0.0    },     "_shards": {      "successful": 1,       "failed": 0,       "total": 1    },     "took": 26,     "timed_out": false  }

这个就不算聚合,只是看了一下最终满足过滤条件的 total hits count。

COUNT(ipo_year)

这个和 COUNT(*) 的区别是 COUNT(ipo_year) 要求字段必须有值才算一个。

$ cat << EOF | ./es_query.py http://127.0.0.1:9200  select count(ipo_year) from symbol  EOF
{"count(ipo_year)": 2898}

Elasticsearch

{    "aggs": {      "count(ipo_year)": {        "value_count": {          "field": "ipo_year"        }      }    },     "size": 0  }
{    "hits": {      "hits": [],       "total": 6714,       "max_score": 0.0    },     "_shards": {      "successful": 1,       "failed": 0,       "total": 1    },     "took": 55,     "aggregations": {      "count(ipo_year)": {        "value": 2898      }    },     "timed_out": false  }

Profile

[    {      "query": [        {          "query_type": "MatchAllDocsQuery",          "lucene": "*:*",          "time": "0.3204170000ms",          "breakdown": {            "score": 0,            "create_weight": 10688,            "next_doc": 278660,            "match": 0,            "build_scorer": 31069,            "advance": 0          }        }      ],      "rewrite_time": 2279,      "collector": [        {          "name": "MultiCollector",          "reason": "search_multi",          "time": "2.957183000ms",          "children": [            {              "name": "TotalHitCountCollector",              "reason": "search_count",              "time": "0.2319240000ms"            },            {              "name": "ValueCountAggregator: [count(ipo_year)]",              "reason": "aggregation",              "time": "1.999916000ms"            }          ]        }      ]    }  ]

这是我们的第一个聚合例子。可以从profile结果看出来,其实现方式在采集文档的时候加上了ValueCountAggregator统计了字段非空的文档数量。

COUNT(DISTINCT ipo_year)

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200  select count(distinct ipo_year) from symbol  EOF
{"count(distinct ipo_year)": 39}

Elasticsearch

{    "aggs": {      "count(distinct ipo_year)": {        "cardinality": {          "field": "ipo_year"        }      }    },     "size": 0  }
{    "hits": {      "hits": [],       "total": 6714,       "max_score": 0.0    },     "_shards": {      "successful": 1,       "failed": 0,       "total": 1    },     "took": 24,     "aggregations": {      "count(distinct ipo_year)": {        "value": 39      }    },     "timed_out": false  }

Profile

[    {      "query": [        {          "query_type": "MatchAllDocsQuery",          "lucene": "*:*",          "time": "0.2033600000ms",          "breakdown": {            "score": 0,            "create_weight": 7501,            "next_doc": 162905,            "match": 0,            "build_scorer": 32954,            "advance": 0          }        }      ],      "rewrite_time": 2300,      "collector": [        {          "name": "MultiCollector",          "reason": "search_multi",          "time": "2.438386000ms",          "children": [            {              "name": "TotalHitCountCollector",              "reason": "search_count",              "time": "0.2240230000ms"            },            {              "name": "CardinalityAggregator: [count(distinct ipo_year)]",              "reason": "aggregation",              "time": "1.471620000ms"            }          ]        }      ]    }  ]

这个例子里 ValueCountAggregator 变成了 CardinalityAggregator

SUM(market_cap)

MIN/MAX/AVG/SUM 这几个简单的聚合也是支持的

$ cat << EOF | ./es_query.py http://127.0.0.1:9200  select sum(market_cap) from symbol           EOF
{"sum(market_cap)": 11454155180142.0}

Elasticsearch

{    "aggs": {      "sum(market_cap)": {        "sum": {          "field": "market_cap"        }      }    },     "size": 0  }
{    "hits": {      "hits": [],       "total": 6714,       "max_score": 0.0    },     "_shards": {      "successful": 1,       "failed": 0,       "total": 1    },     "took": 15,     "aggregations": {      "sum(market_cap)": {        "value": 11454155180142.0      }    },     "timed_out": false  }

Profile

[    {      "query": [        {          "query_type": "MatchAllDocsQuery",          "lucene": "*:*",          "time": "0.2026870000ms",          "breakdown": {            "score": 0,            "create_weight": 8097,            "next_doc": 163069,            "match": 0,            "build_scorer": 31521,            "advance": 0          }        }      ],      "rewrite_time": 2151,      "collector": [        {          "name": "MultiCollector",          "reason": "search_multi",          "time": "2.461247000ms",          "children": [            {              "name": "TotalHitCountCollector",              "reason": "search_count",              "time": "0.3302140000ms"            },            {              "name": "SumAggregator: [sum(market_cap)]",              "reason": "aggregation",              "time": "1.102363000ms"            }          ]        }      ]    }  ]

过滤 + 聚合

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200  select sum(market_cap) from symbol where ipo_year=1998  EOF
{"sum(market_cap)": 107049150786.0}

Elasticsearch

{    "query": {      "term": {        "ipo_year": 1998      }    },     "aggs": {      "sum(market_cap)": {        "sum": {          "field": "market_cap"        }      }    },     "size": 0  }
{    "hits": {      "hits": [],       "total": 56,       "max_score": 0.0    },     "_shards": {      "successful": 1,       "failed": 0,       "total": 1    },     "took": 11,     "aggregations": {      "sum(market_cap)": {        "value": 107049150786.0      }    },     "timed_out": false  }

Profile

[    {      "query": [        {          "query_type": "TermQuery",          "lucene": "ipo_year:`N",          "time": "0.4526400000ms",          "breakdown": {            "score": 0,            "create_weight": 220579,            "next_doc": 159412,            "match": 0,            "build_scorer": 72649,            "advance": 0          }        }      ],      "rewrite_time": 3750,      "collector": [        {          "name": "MultiCollector",          "reason": "search_multi",          "time": "0.2203470000ms",          "children": [            {              "name": "TotalHitCountCollector",              "reason": "search_count",              "time": "0.009478000000ms"            },            {              "name": "SumAggregator: [sum(market_cap)]",              "reason": "aggregation",              "time": "0.1557820000ms"            }          ]        }      ]    }  ]

query 过滤完,然后再计算 aggs