ELK之elastic 查询

elastic是分布式搜索和分析引擎。可以极大提高搜索效率,官方客户端在Java、.NET(C#)、PHP、Python、Apache Groovy、Ruby和许多其他语言中都是可用的。ES 是基于 Lucene 的全文检索引擎,它会对数据进行分词后保存索引,擅长管理大量的索引数据...

elastic 之所以日此快(甚至比mysql主键查询还要快)是因为使用了倒排索引,简单说是由属性值来确定记录的位置。Elasticsearch 还做了许多针对性的优化,当我们对两个字段进行检索时,就可以利用 Bitmap(有值为1,无值为0)进行优化and查询直接通过位与计算便可得出结果。

Name Posting List
name=zhangsan [1,3,5]
age=18 [1,2,4,5]
[1, 3, 5] ⇒ 10101 
 
[1, 2, 4, 5] ⇒ 11011 
  • 这样两个二进制数组求与便可得出结果:
10001 ⇒ [1, 5] 

查询

should 、must、must_not… 同级

不为空 至少满足其中一个

  • minimum_should_match:至少满足n项
  • should:应该
    • exists:不为空
POST rpt_nginx_log/_search
{
  "query": {
    "bool": {
      "minimum_should_match": 1, 	
      "should": [
        {
          "exists": { "field": "uid"}    
        },
        {
          "exists": {"field": "anoyu"}
        }
      ]
    }
  },
  "sort": {
		"lastupdate": {
			"order": "desc"
		}
 },
  "_source": ["log_ip", "os",   "userinfo.name"  ]
}

#should 、must嵌套
"query": {
	"bool": {
		"must": [
			{
				"bool": {
					"should": [
						{"match": {"look_user": "test1"}},
						{"match": {"userId": "test2"}}
					]
				}
			},
			{"terms": {"status": 1}},
			{"match": {"verify": 2}}
		]
	}
}
  • mush
    • match
    • range
#match
POST rpt_nginx_log/_search
{
    "query": {
        "bool": {
            "must": [
                {
                    "match": {
                        "user_url": {
                            "query": "国最高",  
                            "operator": "and"    /*分析关系*/
                        }
                    }
                },
                {
                     "terms": {
                         "uid" : [18587, 29004]
                      }
                }
            ]
        }
    }
}

# range
POST rpt_nginx_log/_search
{
    "query": {
        "bool": {
            "must": [
                {
                    "range": {
                      "log_time": {
                            "gte": "2021-01-03 22:11:00",
                            "lte": "2021-01-03 22:12:00"
                        }
                    }
                }
            ]
        }
    }
}

精确查询

{
	"query": {
		"match_phrase": {
			"user_url": "https://www.iphouse.cn/groupList.html"
		}
	}
}

模糊查询

#模糊查询
{
    "query": {
        "bool": {
            "must": [
                {
                    "wildcard": {
                        "imgae": {
                            "value": "*https*"                          
                        }
                    }
                }
            ]
        }
    },
    "from":1,
    "size":100,
    "track_total_hits": true
}

排序

### 普通排序
{
	"query": {
		"match_all": {}
	},
  	"sort": {
		"lastupdate": {
			"order": "desc"
		}
    }
}

### 根据字段长度排序
{
	"query": {
		"match_all": {}
	},
	"size": 100,
	"sort": {
		"_script": {
			"script": {
				"source": "doc['name.keyword'].size()>0 ?doc['name.keyword'].value.length():0"
			},
			"type": "number",
			"order": "desc"
		}
	}
}

长度筛选

{
  "query": {
    "script": {
      "script": "doc['ipc'].size() ==0"
    }
  },
  "from": 15,
  "_source": [
    "ipc"
  ]
}

聚合查询

查询后二次处理

# aggs
# res_name:结果名字
# terms:类型分组\统计\平均值等(terms|stats|avg...)
# field: "字段"
POST rpt_nginx_log/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "exists": {
            "field": "uid"
          }
        }
      ]
    }
  },
  "aggs": {
    "res_name": {
      "terms": {
          "field": "uid",
           "size":30
        }
    }
  },
  "_source": [
    "userinfo.nickname",
    "userinfo.vip",
    "uid"
  ]
}






##################    terms
"aggs": {
		"group_location": {
			"terms": {
			"script": {
				"source": "  if(ctx._source.initial_url instanceof String && ctx._source.initial_url.length()>2 ){   String s=ctx._source.initial_url;        ArrayList array=new ArrayList();    if(!s.isEmpty()){        String splitter = '/|?';        StringTokenizer tokenValue = new StringTokenizer(s, splitter);        while (tokenValue.hasMoreTokens()) {        array.add(tokenValue.nextToken());        }        }        ctx._source.module_name=array[0];        }       "
				}
				"size": 10000
			}
		}
	}

	"aggs": {
		"top_tags": {
			"terms": {
				"field": "module_name",
				"size": 100
			},
			"aggs": {				###自定义键名detail:补充详情
				"detail": {
					"top_hits": {
						"_source": {
							"includes": [
								"user_url"
							]
						},
						"size": 1
					}
				}
			}
		}
	}
##################
"aggs": {
	"group_location": {
			"terms": {
				"script": {
					"source": " if( doc['initial_url.keyword'].size()>0 ){ def  _base_url = doc['initial_url.keyword'].value.substring(1); def _end_pointer =   _base_url.indexOf('/');  if(_end_pointer>0){ return  _base_url.substring(0,_end_pointer);} }  "
				},
				"size": 10000
			},
			"aggs": {
				"data_time": {
				   "detail":{				###自定义键名detail:补充详情
                        "top_hits":{
                            "size":1 ,
                            "_source":{
                                "includes":[
                                    "userinfo.login_phone"
                                ]
                            }
                        }
                    },
                    "date_histogram": {
                        "extended_bounds": {		#自动补全
                            "min": 1609170600000,
                            "max": 1609517110000
                         },
                        "time_zone"=> "GMT+8",		##时区+8
                        "field": "log_time",
                        "interval": "1M",			#步进 1y|1M|1d
                        "format": "yyyy-MM-dd"
                         "order": {
                          "_term": "desc"    
                        }
                    }
				}
			}
		}
	}
	
	#####aggs    value_count聚合count   stats
	"aggs": {
		"hat_prices": {
			"value_count": {
				"field": "uid"
			}
		}
	}
	
	###### 多字段聚合(逗号拼接)
	"aggs": {
		"all_interests": {
			"terms": {
				"script": {
					"source": "doc['susongshenjiid'].value  +','+ doc['wenshuleixingid'].value +','+ doc['anjianxingzhiid'].value"
				}
			}
		}
	}

nested判空

{
  "query": {
    "nested": {
      "path": "COMPSCORELIST",
      "query": {
        "bool": {
          "must": {
            "exists": {
              "field": "COMPSCORELIST.FIELD"
            }
          }
        }
      }
    }
  }
}

分词器测试

GET /my_index/_analyze
{
	"text":"飞科剃须刀",
	"analyzer":"standard"
}
###分词结果:飞、科、剃、须、刀



GET /my_index/_analyze
{
	"text":"飞科剃须刀",
	"analyzer":"ik_max_word"
}
###分词结果:飞、科、剃须刀、剃须、刀



GET /my_index/_analyze
{
	"text":"飞科剃须刀",
	"analyzer":"ik_smart"
}
###分词结果:飞、科、剃须刀

其他

# 与query 同级 获取真实总条数
"track_total_hits": true,

###
"bool": {
			"must_not": [
				{
					"exists": {
						"field": "mothod_name"
					}
				}
			]
		}
###
{
  "query": {
    "match_all": {}
    }
}
Licensed under 京ICP备17003353号-3