Python elasticsearch_dsl 模块,Q 实例源码

我们从Python开源项目中,提取了以下45个代码示例,用于说明如何使用elasticsearch_dsl.Q

项目:Stream4Flow    作者:CSIRT-MU    | 项目源码 | 文件源码
def get_summary_statistics():
    """
    Obtains statistics about current sum of flows, packets, bytes.

    :return: JSON with status "ok" or "error" and requested data.
    """

    try:
        # Elastic query
        client = elasticsearch.Elasticsearch([{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
        elastic_bool = []
        elastic_bool.append({'range': {'@timestamp': {'gte': "now-5m", 'lte': "now"}}})
        elastic_bool.append({'term': {'@type': 'protocols_statistics'}})

        qx = Q({'bool': {'must': elastic_bool}})
        s = Search(using=client, index='_all').query(qx)
        s.aggs.bucket('sum_of_flows', 'sum', field='flows')
        s.aggs.bucket('sum_of_packets', 'sum', field='packets')
        s.aggs.bucket('sum_of_bytes', 'sum', field='bytes')
        s.sort('@timestamp')
        result = s.execute()

        # Result Parsing into CSV in format: timestamp, tcp protocol value, udp protocol value
        data = "Timestamp, Flows, Packets, Bytes;"
        timestamp = "Last 5 Minutes"
        data += timestamp + ', ' +\
                str(int(result.aggregations.sum_of_flows['value'])) + ', ' +\
                str(int(result.aggregations.sum_of_packets['value'])) + ', ' +\
                str(int(result.aggregations.sum_of_bytes['value']))

        json_response = '{"status": "Ok", "data": "' + data + '"}'
        return json_response

    except Exception as e:
        json_response = '{"status": "Error", "data": "Elasticsearch query exception: ' + escape(str(e)) + '"}'
        return json_response
项目:mordecai    作者:openeventdata    | 项目源码 | 文件源码
def query_geonames_country(self, placename, country):
        """
        Like query_geonames, but this time limited to a specified country.
        """
        # first, try for an exact phrase match
        q = {"multi_match": {"query": placename,
                             "fields": ['name^5', 'asciiname^5', 'alternativenames'],
                            "type" : "phrase"}}
        r = Q("match", country_code3=country)
        res = self.conn.query(q).query(r)[0:50].execute()

        # if no results, use some fuzziness, but still require all terms to be present.
        # Fuzzy is not allowed in "phrase" searches.
        if res.hits.total == 0:
                # tried wrapping this in a {"constant_score" : {"query": ... but made it worse
            q = {"multi_match": {"query": placename,
                                 "fields": ['name', 'asciiname', 'alternativenames'],
                                     "fuzziness" : 1,
                                     "operator":   "and"},
                }
            r = Q("match", country_code3=country)
            res = self.conn.query(q).query(r)[0:50].execute()

        out = utilities.structure_results(res)
        return out
项目:WASE    作者:thomaspatzke    | 项目源码 | 文件源码
def query_missing(s, field, name, methods=None, responsecodes=None, invert=False):
    # main query
    q = Q("match", ** { field: name })
    if not invert:
        q = ~q
    s.query = q

    # add filters
    ## method
    if methods:
        s = s.filter("terms", ** { 'request.method': methods })
    ## response codes
    if responsecodes:
        for rc in responsecodes:
            rcrange = rc.split("-")
            if len(rcrange) == 2:
                s = s.filter("range", ** { 'response.status': { "gte": int(rcrange[0]), "lte": int(rcrange[1]) } })
            else:
                s = s.filter("term", ** { 'response.status': rc })

    print_debug(s.to_dict())
    return s
项目:WASE    作者:thomaspatzke    | 项目源码 | 文件源码
def query_vals(s, field, name, values, invert):
    # match documents where given field value name is present, if required
    if values:
        q = Q("nested", path=field, query=Q("wildcard", ** { field + ".value.keyword": values }))
        if invert:
            s.query = ~q
        else:
            s.query = q
    else:
        s.query = Q()

    # 1. descent into response.headers/request.parameters
    # 2. filter given header
    # 3. aggregate values
    # 4. jump back into main document
    # 5. aggregate URLs
    s.aggs.bucket("field", "nested", path=field)\
            .bucket("valuefilter", "filter", Q("match", ** { field + ".name": name }))\
            .bucket("values", "terms", field=field + ".value.keyword", size=args.size)\
            .bucket("main", "reverse_nested")\
            .bucket("urls", "terms", field="request.url.keyword", size=args.size)
    return s
项目:open-ledger    作者:creativecommons    | 项目源码 | 文件源码
def about(request):
    """Information about the current site, its goals, and what content is loaded"""
    # Provider counts
    providers = cache.get_or_set(CACHE_STATS_NAME, [], CACHE_STATS_DURATION)
    if not providers:
        for provider in sorted(settings.PROVIDERS.keys()):
            s = Search()
            q = Q('term', provider=provider)
            s = s.query(q)
            response = s.execute()
            if response.hits.total > 0:
                data = settings.PROVIDERS[provider]
                total = intcomma(response.hits.total)
                data.update({'hits': total})
                providers.append(data)
        # All results
        s = Search()
        response = s.execute()
        total = intcomma(response.hits.total)
        providers.append({'display_name': 'Total', 'hits': total})
        cache.set(CACHE_STATS_NAME, providers)
    return render(request, "about.html", {'providers': providers})
项目:open-ledger    作者:creativecommons    | 项目源码 | 文件源码
def correct_orphan_records(self, provider='europeana', end=None):
        """[#185] Delete records from the search engine which aren't found in the database"""
        s = Search()
        q = Q('term', provider=provider)
        s = s.query(q)
        response = s.execute()
        total = response.hits.total
        # A file extracted from the production database listing all of the europeana identifiers
        identifier_file = '/tmp/europeana-identifiers.json'
        db_identifiers = set(json.load(open(identifier_file)))
        total_in_db = len(db_identifiers)
        log.info("Using search engine instance %s", settings.ELASTICSEARCH_URL)
        log.info("Total records: %d (search engine), %d (database) [diff=%d]", total, total_in_db, total - total_in_db)
        deleted_count = 0
        for r in s.scan():
            if r.identifier not in db_identifiers:
                img = search.Image.get(id=r.identifier)
                log.debug("Going to delete image %s", img)
                deleted_count += 1
        log.info("Deleted %d from search engine", deleted_count)
项目:photo-manager    作者:karih    | 项目源码 | 文件源码
def aggregate(self, search):
        """
        Add aggregations representing the facets selected, including potential
        filters.
        """
        for f, facet in iteritems(self.facets):
            agg = facet.get_aggregation()
            agg_filter = esd.Q('match_all')
            for field, filter in iteritems(self._filters):
                if f == field or (f.startswith("date") and field.startswith("date")):
                    continue
                agg_filter &= filter
            search.aggs.bucket(
                '_filter_' + f,
                'filter',
                filter=agg_filter
            ).bucket(f, agg)
项目:photo-manager    作者:karih    | 项目源码 | 文件源码
def build(self):
        fs = self._clone()

        for facet in self.facets:
            if "include_%s" % facet.name not in self.args:
                continue

            agg_filter = esd.Q("match_all")
            for inner in self.facets:
                if inner.name != facet.name:
                    if inner.is_filtered(self.args):
                        agg_filter &= inner.filters(self.args)

            for agg_name, agg in facet.aggregates():
                fs.aggs.bucket("_filter_" + agg_name, "filter", filter=agg_filter).bucket(agg_name, agg)

        post_filter = esd.Q('match_all')
        for facet in self.facets:
            if facet.is_filtered(self.args):
                post_filter &= facet.filters(self.args)
        fs.post_filter._proxied &= post_filter

        return fs
项目:fccforensics    作者:RagtagOpen    | 项目源码 | 文件源码
def tag_by_email(self, emails, breached):
        docs = []
        s = Search(using=self.es).\
            filter(Q({'terms': {'contact_email.keyword': emails}})).\
            source(['id_submission'])
        print('%s emails breached=%s' % (len(emails), breached))
        for hit in s.scan():
            docs.append(lib.bulk_update_doc(hit['id_submission'], {'breached': breached}))
            if not len(docs) % 500:
                print('\tfetched %s' % len(docs))
        print('\t%s matches' % len(docs))
        return docs
项目:fccforensics    作者:RagtagOpen    | 项目源码 | 文件源码
def run(self):
        emails = {
            'breached': set(),
            'unbreached': set(),
        }
        # contact_email exists
        must = [Q('exists', field='contact_email')]
        # matches source if specified
        if self.source:
            must.append(Q({'term': {'analysis.source': self.source}}))
        # not already tagged with breached
        s = Search(using=self.es).\
            query(FunctionScore(
                  query=Q('bool',
                          must=must,
                          must_not=[Q('exists', field='analysis.breached')]),
                  functions=[SF('random_score', seed=int(time.time()))]
            )).\
            source(['contact_email'])
        print('%s breached: source=%s limit=%s' % (datetime.now().isoformat(), self.source,
            self.limit))
        print('query=\n%s' % json.dumps(s.to_dict()))
        for filing in s[:self.limit]:
            email = filing['contact_email']
            if not email or email in emails['breached'] or email in emails['unbreached']:
                continue
            breached = self.is_breached(email)
            emails['breached' if breached else 'unbreached'].add(email)
        docs = []
        print('done source=%s' % self.source)
        if emails['breached']:
            docs += self.tag_by_email(list(emails['breached']), True)
        if emails['unbreached']:
            docs += self.tag_by_email(list(emails['unbreached']), False)
        try:
            lib.bulk_update(self.es, docs)
        except Exception as e:
            print('error indexing: %s' % e)
项目:defplorex    作者:trendmicro    | 项目源码 | 文件源码
def monitor(index, delta, query_string):
    click.clear()

    def cnt():
        q = Q('query_string', query=query_string)
        s = Search(
                using=es.client,
                index=index).query(q)
        return s.count()

    N = cnt()
    tot = Search(using=es.client, index=index).count()

    if not delta:
        N = tot

    log.info('Processing %d records (total: %d)', N, tot)

    click.echo('You can exit by CTRL-C: results will still process')

    bar = SlowOverallFancyBar('', max=N, grand_total=tot)
    while True:
        time.sleep(5.0)
        try:
            n = cnt()
            if isinstance(n, int):
                if delta:
                    done = N - n
                else:
                    done = n
                bar.goto(done)
        except Exception as e:
            log.warn('Cannot count: %s', e)
    bar.finish()
项目:defplorex    作者:trendmicro    | 项目源码 | 文件源码
def search(self, **kwargs):
        q = kwargs.get('q', '*')
        sort = kwargs.get('sort', 'timestamp')
        search_after = kwargs.get('search_after')
        size = kwargs.get('size', 50)
        source = kwargs.get('source')
        extra = dict(
                size=size)

        if search_after:
            extra.update(dict(search_after=search_after))

        s = Search(using=self.client, index=self.index_name)
        if source:
            s = s.source(source)
        s = s.sort(sort)
        s = s.query(Q('query_string', query=q))
        s = s.extra(**extra)

        log.info('Query: %s', s.to_dict())

        r = s.execute()
        count = r.hits.total
        took = r.took

        result = r, count, took

        return result
项目:correctiv-nursinghomes    作者:correctiv    | 项目源码 | 文件源码
def search_reports(state, must_terms, should_terms):
    s = Report.search()
    q = Q('bool',
        must=[Q('match', body=term) for term in must_terms],
        should=[Q('match', body=term) for term in should_terms],
        minimum_should_match=1
    )
    s = s.filter('terms', state=[state]).query(q)
    response = s.execute()
    return response.to_dict()['hits']['hits']
项目:micromasters    作者:mitodl    | 项目源码 | 文件源码
def test_create_search_obj_filter(self, is_advance_search_capable):
        """
        Test that Search objects are created with program-limiting and filled_out=True query parameters
        """
        user = self.user if is_advance_search_capable else self.learner
        search_obj = create_search_obj(user)
        search_query_dict = search_obj.to_dict()
        expected_program_query = Q(
            'bool',
            should=[
                Q('term', **{'program.id': self.program.id})
            ],
            minimum_should_match=1,
            must=[
                Q('term', **{'program.is_learner': True})
            ]
        )
        expected_filled_out_query = Q('term', **{'profile.filled_out': True})
        expected_privacy_query = ~Q('term', **{'profile.account_privacy': 'private'})
        assert 'query' in search_query_dict
        assert 'bool' in search_query_dict['query']
        assert 'filter' in search_query_dict['query']['bool']
        assert len(search_query_dict['query']['bool']['filter']) == 2 if is_advance_search_capable else 3
        expected_filters = [
            expected_program_query.to_dict(),
            expected_filled_out_query.to_dict(),
        ]
        if not is_advance_search_capable:
            expected_filters.insert(0, expected_privacy_query.to_dict())
        assert search_query_dict['query']['bool']['filter'] == expected_filters
项目:micromasters    作者:mitodl    | 项目源码 | 文件源码
def create_program_limit_query(user, staff_program_ids, filter_on_email_optin=False):
    """
    Constructs and returns a query that limits a user to data for their allowed programs

    Args:
        user (django.contrib.auth.models.User): A user
        staff_program_ids (list of int): the list of program ids the user is staff for if any
        filter_on_email_optin (bool): If true, filter out profiles where email_optin != true

    Returns:
        elasticsearch_dsl.query.Q: An elasticsearch query
    """
    users_allowed_programs = get_searchable_programs(user, staff_program_ids)
    # if the user cannot search any program, raise an exception.
    # in theory this should never happen because `UserCanAdvanceSearchPermission`
    # takes care of doing the same check, but better to keep it to avoid
    # that a theoretical bug exposes all the data in the index
    if not users_allowed_programs:
        raise NoProgramAccessException()

    must = [
        Q('term', **{'program.is_learner': True})
    ]

    if filter_on_email_optin:
        must.append(Q('term', **{'profile.email_optin': True}))

    # no matter what the query is, limit the programs to the allowed ones
    # if this is a superset of what searchkit sends, this will not impact the result
    return Q(
        'bool',
        should=[
            Q('term', **{'program.id': program.id}) for program in users_allowed_programs
        ],
        # require that at least one program id matches the user's allowed programs
        minimum_should_match=1,
        must=must,
    )
项目:WASE    作者:thomaspatzke    | 项目源码 | 文件源码
def query(s, q):
    s.query = Q("query_string", query=q)
    return s

### Main ###
项目:django-rest-elasticsearch    作者:myarik    | 项目源码 | 文件源码
def get_es_query(self, s_query, s_fields, **kwargs):
        """ Create and return elasticsearch Query.

        You could overload this method for creating your custom
        search Query object.

        Arguments:
            s_query: request search param
            s_fields: search fields

        Keyword arguments:
            request: request object
            view: view object
        """
        return Q("multi_match", query=s_query, fields=s_fields)
项目:freshonions-torscraper    作者:dirtyfilthy    | 项目源码 | 文件源码
def elasticsearch_retrieve_page_by_id(page_id):
    query = Search().filter(Q("term", nid=int(page_id)))[:1]
    result = query.execute()
    if result.hits.total == 0:
        return None
    return result.hits[0]
项目:freshonions-torscraper    作者:dirtyfilthy    | 项目源码 | 文件源码
def elasticsearch_delete_old():
    _from = NEVER
    _to   = datetime.now() - timedelta(days=30)
    query = Search().filter(Q("range", visited_at={'from': _from, 'to': _to}))
    result = query.delete()
项目:freshonions-torscraper    作者:dirtyfilthy    | 项目源码 | 文件源码
def elasticsearch_pages(context, sort, page):
    result_limit = int(os.environ['RESULT_LIMIT'])
    max_result_limit = int(os.environ['MAX_RESULT_LIMIT'])
    start = (page - 1) * result_limit
    end   = start + result_limit
    domain_query = Q("term", is_banned=False)
    if context["is_up"]:
        domain_query = domain_query & Q("term", is_up=True)
    if not context["show_fh_default"]:
        domain_query = domain_query & Q("term", is_crap=False)
    if not context["show_subdomains"]:
        domain_query = domain_query & Q("term", is_subdomain=False)
    if context["rep"] == "genuine":
        domain_query = domain_query & Q("term", is_genuine=True)
    if context["rep"] == "fake":
        domain_query = domain_query & Q("term", is_fake=True)



    limit = max_result_limit if context["more"] else result_limit

    has_parent_query = Q("has_parent", type="domain", query=domain_query)
    if context['phrase']:
        query = Search().filter(has_parent_query).query(Q("match_phrase", body_stripped=context['search']))
    else:
        query = Search().filter(has_parent_query).query(Q("match", body_stripped=context['search']))

    query = query.highlight_options(order='score', encoder='html').highlight('body_stripped')[start:end]
    query = query.source(['title','domain_id','created_at', 'visited_at']).params(request_cache=True)

    if   context["sort"] == "onion":
        query = query.sort("_parent")
    elif context["sort"] == "visited_at":
        query = query.sort("-visited_at")
    elif context["sort"] == "created_at":
        query = query.sort("-created_at")
    elif context["sort"] == "last_seen":
        query = query.sort("-visited_at")

    return query.execute()
项目:open-ledger    作者:creativecommons    | 项目源码 | 文件源码
def test_query(self):
        """It should be possible to query the search engine for results"""
        q = Q("match", title="greyhounds")
        s = self.s.query(q)
        r = s.execute()
        self.assertEqual(0, r.hits.total)  # We haven't indexed anything, so no results are expected
项目:open-ledger    作者:creativecommons    | 项目源码 | 文件源码
def test_search(self):
        """It should be possible to find an item by query"""
        self._index_img(self.img1)
        s = self.s.query(Q("match", title="greyhounds"))
        r = s.execute()
        self.assertEquals(1, r.hits.total)
项目:open-ledger    作者:creativecommons    | 项目源码 | 文件源码
def test_remove_from_search_after_sync(self):
        """When an image is removed from the source, it should be removed from the search engine"""
        self._index_img(self.removed)
        s = self.s.query(Q("match", title="removed"))
        r = s.execute()
        self.assertEquals(1, r.hits.total)
        with responses.RequestsMock() as rsps:
            rsps.add(responses.HEAD, FOREIGN_URL + TEST_IMAGE_REMOVED, status=404)
            self.removed.sync()
        signals._update_search_index(self.removed)
        self.es.indices.refresh()
        s = self.s.query(Q("match", title="removed"))
        r = s.execute()
        self.assertEquals(0, r.hits.total)
项目:jamdb    作者:CenterForOpenScience    | 项目源码 | 文件源码
def get(self, handler):
        # Rip the search object out of the elasticsearch backend
        sort = handler.sort
        search = self.collection._state._backend.raw_backend().search

        if handler.request.query_arguments.get('q'):
            search = search.query(elasticsearch_dsl.Q('query_string', query=handler.request.query_arguments['q'][-1].decode('utf-8')))
        else:
            # This should technically be elsewhere but the search object
            # does not provide a nice way to figure out if there is a query or not.
            search = search.sort({'ref': {
                'order': 'asc',
                'unmapped_type': 'string'
            }})

        if handler.request.query_arguments.get('sort'):
            search = search.sort({sort.key: {
                'order': 'asc' if sort.order == 1 else 'desc',
                'unmapped_type': 'string'
            }})

        # Hacking into the serializer
        handler._serializer = self.get_serializer()
        handler._view.parents = handler._view.parents + (self.collection,)

        start = handler.page * handler.page_size
        wrapper = SearchResultWrapper(search[start:start + handler.page_size])
        return handler.write({
            'meta': {
                'total': wrapper.count(),
                'perPage': handler.page_size
            },
            # TODO
            'links': {},
            'data': [handler.serialize(resource) for resource in wrapper]
        })
项目:masterthesis-search-interface    作者:welil    | 项目源码 | 文件源码
def get_results(search_object, searched_fields, value):
    search_query = value

    if not search_query:
        q = Q(query=None)
    else:
        q = Q("multi_match", query=search_query, fields=searched_fields, operator='and')

    search_object = search_object.query(q)[0:SEARCH_RESULTS_PER_PAGE]

    if search_query:
        for searched_field in searched_fields:
            search_object = search_object.highlight(searched_field)

    return search_object
项目:nxtool-ng    作者:nbs-system    | 项目源码 | 文件源码
def add_filters(self, filters, regexp=False, negative=False):
        """
        Add `filters` to the query.
         `filters is a dict of the form {'field': value, field2: value2}, but you can also use a list of values
         instead of a `str`. They'll be added as a _or_ (and not a _and_).
        :param dict filters:
        :param bool regexp:
        :param bool negative:
        :return:
        """
        # We need to use multi_match, since we get the fields names dynamically.
        for key, value in filters.items():
            if isinstance(value, set):
                value = list(value)

            # There is no need to process empty values.
            if not value:
                continue

            if isinstance(value, list):
                if negative:
                    self.search = self.search.query(Q('bool', must_not=[
                        reduce(operator.or_, [Q('multi_match', query=v, fields=[key]) for v in value])])
                    )
                else:
                    self.search = self.search.query(Q('bool', must=[
                        reduce(operator.or_, [Q('multi_match', query=v, fields=[key]) for v in value])])
                    )
            else:
                if negative:
                    self.search = self.search.query(~Q("multi_match", query=value, fields=[key]))
                else:
                    self.search = self.search.query(Q("multi_match", query=value, fields=[key]))
项目:photo-manager    作者:karih    | 项目源码 | 文件源码
def filters(self, args):
        if self.name in args and args[self.name] == "":
            return esd.Q('missing', field=self.name)
        else:
            return esd.Q('terms', **{self.name : [args.get(self.name), ]})
项目:photo-manager    作者:karih    | 项目源码 | 文件源码
def filters(self, args):
        if self.name in args and len(self.name) > 0:
            return esd.Q('prefix', **{self.name : args.get(self.name), })
        else:
            return super().filters(args)
项目:photo-manager    作者:karih    | 项目源码 | 文件源码
def filters(self):
        range = {}
        if self.value[0] is not None:
            range["from"] = self.value[0]
        if self.value[1] is not None:
            range["to"] = self.value[1]
        return esd.Q('range', **{self.name : range})
项目:userline    作者:THIBER-ORG    | 项目源码 | 文件源码
def get_dsl_logoff_query(screen):
    q = None
    for evtid in config.EVENTS_LOGOFF:
        tmp = Q("match",event_identifier=evtid)
        if q is None:
            q = tmp
        else:
            q = q | tmp

    if screen is True:
        for evtid in config.EVENTS_LOGOFF_SCREEN:
            q = q | Q("match",event_identifier=evtid)

    return q
项目:userline    作者:THIBER-ORG    | 项目源码 | 文件源码
def get_dsl_logon_query(screen):
    q = None
    for evtid in config.EVENTS_LOGON:
        tmp = Q("match",event_identifier=evtid)
        if q is None:
            q = tmp
        else:
            q = q | tmp

    if screen is True:
        for evtid in config.EVENTS_LOGON_SCREEN:
            q = q | Q("match",event_identifier=evtid)

    return q
项目:userline    作者:THIBER-ORG    | 项目源码 | 文件源码
def get_logout_event(index,logonid,timestamp,maxtstamp,screen):
    """
    Look for the logoff event belonging to the given logon id or a shutdown event.
    """
    conn = connections.get_connection()

    # workaround to fix time presition issues
    timestamp = timestamp - 999

    logoff = get_dsl_logoff_query(screen)
    q = [ \
        Q('match',data_type='windows:evtx:record') , \
        Q('match',xml_string=logonid) , \
        logoff \
    ]

    s = Search(using=conn, index=index).query(Q('bool',must=q)).filter('range',datetime={'gte':timestamp,'lte':maxtstamp}).sort('-datetime')
    res = s.execute()
    try:
        evt = res[0]
    except:
        evt = None

    if evt is None:
        q = [ Q('match',event_identifier=config.EVENT_SHUTDOWN) ]
        s = Search(using=conn, index=index).query(Q('bool',must=q)).filter('range',datetime={'gte':timestamp,'lte':maxtstamp}).sort('-datetime')
        res = s.execute()
        try:
            evt = res[0]
        except:
            evt = None

    return evt
项目:userline    作者:THIBER-ORG    | 项目源码 | 文件源码
def get_last_shutdown(index,maxtstamp,pattern):
    """
    Look for the last shutdown event
    """

    conn = connections.get_connection()

    q = [ \
        Q('match',data_type='windows:evtx:record') , \
        Q('match',event_identifier=config.EVENT_SHUTDOWN)
    ]

    if pattern:
        q.append(Q('query_string',query=pattern,analyze_wildcard=True))

    s = Search(using=conn, index=index).query(Q('bool',must=q)).filter('range',datetime={'lte':maxtstamp}).sort('-datetime')[0:0]
    s.aggs.bucket('computer','terms',field='computer_name.keyword').bucket('shutdown','top_hits',size=1)

    res = s.execute()
    ret = {}
    for item in res.aggregations['computer']['buckets']:
        ret[item['key']] = item['shutdown']['hits']['hits'][0]

    if len(ret.keys()) == 0:
        ret = None

    return ret
项目:userline    作者:THIBER-ORG    | 项目源码 | 文件源码
def get_last_event(index,computer=None,maxdate=None,pattern=None):
    conn = connections.get_connection()
    q = [ \
        Q('match',data_type='windows:evtx:record')
    ]

    if computer is not None:
        q.append(Q('match',computer_name=computer))

    if pattern:
        q.append(Q('query_string',query=pattern,analyze_wildcard=True))

    if maxdate:
        s = Search(using=conn, index=index).query(Q('bool',must=q)).filter('range',datetime={'lte': maxdate}).sort('-datetime')
    else:
        s = Search(using=conn, index=index).query(Q('bool',must=q)).sort('-datetime')

    if computer is None:
        s = s[0:0]
        s.aggs.bucket('computer','terms',field='computer_name.keyword').bucket('last','top_hits',size=1)

    res = s.execute()

    if computer is None:
        evt = {}
        for item in res.aggregations['computer']['buckets']:
            evt[item['key']] = item['last']['hits']['hits'][0]

        if len(evt.keys()) == 0:
            evt = None
    else:
        try:
            evt = res[0]
        except:
            evt = None

    return evt
项目:stethoscope    作者:Netflix    | 项目源码 | 文件源码
def create_query_for_email(self, search, email):
    return search.query(elasticsearch_dsl.Q({"match": {'email': email}}))
项目:defplorex    作者:trendmicro    | 项目源码 | 文件源码
def paginate(self, index, q='*', limit=None, size=None, id_only=True):
        if not size:
            size = self.bulk_size

        log.info('Limit %s, size %s (q = "%s")', limit, size, q)

        s = Search(
                using=self.client,
                index=index,
                doc_type=self.doc_type)
        s = s.query(Q('query_string', query=q))

        if limit:
            size = min(size, limit)
            s = s.extra(size=size)

        s = s.params(
                scroll='20m',
                size=size)

        if id_only:
            s = s.source(False)

        log.debug('Query: %s', simplejson.dumps(s.to_dict(), indent=2))

        hits = []
        overall = 0

        for h in s.scan():
            if limit is not None and overall >= limit:
                raise StopIteration()

            log.debug('Hit: %s (progress: %d)', h.meta.id, overall)
            if overall < limit or not limit:
                if id_only:
                    hits.append(h.meta.id)
                else:
                    hits.append(h.to_dict())

                if len(hits) == size:
                    yield iter(hits)
                    hits = []
                    overall += size

        if len(hits):
            yield iter(hits)
        else:
            raise StopIteration()
项目:Stream4Flow    作者:CSIRT-MU    | 项目源码 | 文件源码
def get_records_list():
    """
    Obtains list of all records for given type given time range.

    :return: JSON with status "ok" or "error" and requested data.
    """

    # Check login
    if not session.logged:
        json_response = '{"status": "Error", "data": "You must be logged!"}'
        return json_response

    # Check mandatory inputs
    if not (request.get_vars.beginning and request.get_vars.end and request.get_vars.type):
        json_response = '{"status": "Error", "data": "Some mandatory argument is missing!"}'
        return json_response

    # Parse inputs and set correct format
    beginning = escape(request.get_vars.beginning)
    end = escape(request.get_vars.end)
    type = escape(request.get_vars.type)

    try:
        # Elastic query
        client = elasticsearch.Elasticsearch(
            [{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
        elastic_bool = []
        elastic_bool.append({'range': {'@timestamp': {'gte': beginning, 'lte': end}}})
        elastic_bool.append({'term': {'@stat_type': type}})

        # Prepare query
        qx = Q({'bool': {'must': elastic_bool}})

        # Set query according to the statistic type
        search_ip = Search(using=client, index='_all').query(qx)
        search_ip.aggs.bucket('all_nested', 'nested', path='data_array')\
            .bucket('by_key', 'terms', field='data_array.key.raw', size=2147483647)\
            .bucket('stats_sum', 'sum', field='data_array.value')
        results = search_ip.execute()

        data = ""
        for all_buckets in results.aggregations.all_nested.by_key:
            data += all_buckets.key + "," + str(int(all_buckets.stats_sum.value)) + ","

        # Remove trailing comma
        data = data[:-1]

        json_response = '{"status": "Ok", "data": "' + data + '"}'
        return json_response

    except Exception as e:
        json_response = '{"status": "Error", "data": "Exception: ' + escape(str(e)) + '"}'
        return json_response
项目:Stream4Flow    作者:CSIRT-MU    | 项目源码 | 文件源码
def get_host_flows():
    """
    Gets flows, packet and bytes time series for a given host

    Returns: JSON with status "ok" or "error" and requested data.

    """
    # Check login
    if not session.logged:
        json_response = '{"status": "Error", "data": "You must be logged!"}'
        return json_response

    # Check mandatory inputs
    if not (request.get_vars.beginning and request.get_vars.end and request.get_vars.aggregation and request.get_vars.host_ip):
        json_response = '{"status": "Error", "data": "Some mandatory argument is missing!"}'
        return json_response

    # Parse inputs and set correct format
    beginning = escape(request.get_vars.beginning)
    end = escape(request.get_vars.end)
    aggregation = escape(request.get_vars.aggregation)
    host_ip = escape(request.get_vars.host_ip)

    try:
        # Elastic query
        client = elasticsearch.Elasticsearch(
            [{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
        elastic_bool = []
        elastic_bool.append({'range': {'@timestamp': {'gte': beginning, 'lte': end}}})
        elastic_bool.append({'term': {'src_ip': host_ip}})

        qx = Q({'bool': {'must': elastic_bool}})
        s = Search(using=client, index='_all').query(qx)
        s.aggs.bucket('by_time', 'date_histogram', field='@timestamp', interval=aggregation) \
              .metric('sum_of_flows', 'sum', field='stats.total.flow') \
              .metric('sum_of_packets', 'sum', field='stats.total.packets') \
              .metric('sum_of_bytes', 'sum', field='stats.total.bytes')

        result = s.execute()

        data = "Timestamp,Number of flows,Number of packets,Number of bytes;"
        for record in result.aggregations.by_time.buckets:
            timestamp = record.key
            number_of_flows = int(record.sum_of_flows.value)
            number_of_packets = int(record.sum_of_packets.value)
            number_of_bytes = int(record.sum_of_bytes.value)

            data += str(timestamp) + "," + str(number_of_flows) + "," + str(number_of_packets) + "," + str(number_of_bytes) + ";"

        json_response = '{"status": "Ok", "data": "' + data + '"}'
        return (json_response)

    except Exception as e:
        json_response = '{"status": "Error", "data": "Elasticsearch query exception: ' + escape(str(e)) + '"}'
        return json_response
项目:Stream4Flow    作者:CSIRT-MU    | 项目源码 | 文件源码
def get_host_distinct_ports():
    """
    Gets flows, packet and bytes time series for a given host

    Returns: JSON with status "ok" or "error" and requested data.

    """
    # Check login
    if not session.logged:
        json_response = '{"status": "Error", "data": "You must be logged!"}'
        return json_response

    # Check mandatory inputs
    if not (request.get_vars.beginning and request.get_vars.end and request.get_vars.aggregation and request.get_vars.host_ip):
        json_response = '{"status": "Error", "data": "Some mandatory argument is missing!"}'
        return json_response

    # Parse inputs and set correct format
    beginning = escape(request.get_vars.beginning)
    end = escape(request.get_vars.end)
    aggregation = escape(request.get_vars.aggregation)
    host_ip = escape(request.get_vars.host_ip)

    try:
        # Elastic query
        client = elasticsearch.Elasticsearch(
            [{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
        elastic_bool = []
        elastic_bool.append({'range': {'@timestamp': {'gte': beginning, 'lte': end}}})
        elastic_bool.append({'term': {'src_ip': host_ip}})

        qx = Q({'bool': {'must': elastic_bool}})
        s = Search(using=client, index='_all').query(qx)
        s.aggs.bucket('by_time', 'date_histogram', field='@timestamp', interval=aggregation) \
              .metric('dport_avg', 'avg', field='stats.dport_count') \
              .metric('dport_max', 'max', field='stats.dport_count') \
              .metric('dport_min', 'min', field='stats.dport_count')

        result = s.execute()

        data_avg = []
        data_min_max = []
        data_max = []
        data_min = []
        for record in result.aggregations.by_time.buckets:
            timestamp = record.key
            maximum = round(record.dport_max.value, 2) if record.dport_max.value else None
            minimum = round(record.dport_min.value, 2) if record.dport_min.value else None
            data_avg.append([timestamp,round(record.dport_avg.value,2) if record.dport_avg.value else None])
            data_min_max.append([timestamp,[minimum, maximum ]])
            data_max.append(maximum)
            data_min.append(minimum)

        json_response = {"status": "Ok", "data":{ "data_avg": data_avg, "data_min_max": data_min_max, "data_min": data_min, "data_max": data_max}}
        return (json.dumps(json_response))

    except Exception as e:
        json_response = '{"status": "Error", "data": "Elasticsearch query exception: ' + escape(str(e)) + '"}'
        return json_response
项目:Stream4Flow    作者:CSIRT-MU    | 项目源码 | 文件源码
def get_host_distinct_peers():
    """
    Gets flows, packet and bytes time series for a given host

    Returns: JSON with status "ok" or "error" and requested data.

    """
    # Check login
    if not session.logged:
        json_response = '{"status": "Error", "data": "You must be logged!"}'
        return json_response

    # Check mandatory inputs
    if not (request.get_vars.beginning and request.get_vars.end and request.get_vars.aggregation and request.get_vars.host_ip):
        json_response = '{"status": "Error", "data": "Some mandatory argument is missing!"}'
        return json_response

    # Parse inputs and set correct format
    beginning = escape(request.get_vars.beginning)
    end = escape(request.get_vars.end)
    aggregation = escape(request.get_vars.aggregation)
    host_ip = escape(request.get_vars.host_ip)

    try:
        # Elastic query
        client = elasticsearch.Elasticsearch(
            [{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
        elastic_bool = []
        elastic_bool.append({'range': {'@timestamp': {'gte': beginning, 'lte': end}}})
        elastic_bool.append({'term': {'src_ip': host_ip}})

        qx = Q({'bool': {'must': elastic_bool}})
        s = Search(using=client, index='_all').query(qx)
        s.aggs.bucket('by_time', 'date_histogram', field='@timestamp', interval=aggregation) \
              .metric('peer_avg', 'avg', field='stats.peer_number') \
              .metric('peer_max', 'min', field='stats.peer_number') \
              .metric('peer_min', 'max', field='stats.peer_number')

        result = s.execute()

        data_avg = []
        data_min_max=[]
        data_max = []
        data_min = []
        for record in result.aggregations.by_time.buckets:
            timestamp = record.key
            maximum = round(record.peer_max.value, 2) if record.peer_max.value else None
            minimum = round(record.peer_min.value, 2) if record.peer_min.value else None
            data_avg.append([timestamp, round(record.peer_avg.value, 2) if record.peer_avg.value else None])
            data_min_max.append([timestamp, [minimum, maximum]])
            data_max.append(maximum)
            data_min.append(minimum)

        json_response = {"status": "Ok", "data":{ "data_avg": data_avg, "data_min_max": data_min_max, "data_min": data_min, "data_max": data_max}}
        return (json.dumps(json_response))

    except Exception as e:
        json_response = '{"status": "Error", "data": "Elasticsearch query exception: ' + escape(str(e)) + '"}'
        return json_response
项目:Stream4Flow    作者:CSIRT-MU    | 项目源码 | 文件源码
def get_records_list():
    """
    Obtains list of all records for given type given time range.

    :return: JSON with status "ok" or "error" and requested data.
    """

    # Check login
    if not session.logged:
        json_response = '{"status": "Error", "data": "You must be logged!"}'
        return json_response

    # Check mandatory inputs
    if not (request.get_vars.beginning and request.get_vars.end and request.get_vars.filter):
        json_response = '{"status": "Error", "data": "Some mandatory argument is missing!"}'
        return json_response

    # Parse inputs and set correct format
    beginning = escape(request.get_vars.beginning)
    end = escape(request.get_vars.end)
    filter = escape(request.get_vars.filter)

    try:
        # Elastic query
        client = elasticsearch.Elasticsearch(
            [{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
        elastic_bool = []
        elastic_bool.append({'range': {'@timestamp': {'gte': beginning, 'lte': end}}})
        elastic_bool.append({'term': {'@type': 'external_dns_resolver'}})

        # Set filter
        if filter != 'none':
            elastic_should = []
            elastic_should.append({'term': {'src_ip': filter}})
            elastic_should.append({'term': {'resolver_ip.raw': filter}})
            elastic_bool.append({'bool': {'should': elastic_should}})
        qx = Q({'bool': {'must': elastic_bool}})

        # Search with maximum size aggregations
        search = Search(using=client, index='_all').query(qx)
        search.aggs.bucket('by_src', 'terms', field='src_ip', size=2147483647)\
              .bucket('by_dst', 'terms', field='resolver_ip.raw', size=2147483647)\
              .bucket('top_src_dst', 'top_hits', size=1, sort=[{'timestamp': {'order': 'desc'}}])
        results = search.execute()

        # Result Parsing into CSV in format: timestamp, source_ip, resolver_ip, flows
        data = ""
        for src_aggregations in results.aggregations.by_src.buckets:
            for result in src_aggregations.by_dst.buckets:
                record = result.top_src_dst.hits.hits[0]["_source"]
                data += record["timestamp"].replace("T", " ").replace("Z", "") + "," + record["src_ip"] + "," \
                    + record["resolver_ip"] + "," + str(record["flows"]) + ","
        data = data[:-1]

        json_response = '{"status": "Ok", "data": "' + data + '"}'
        return json_response

    except Exception as e:
        json_response = '{"status": "Error", "data": "Exception: ' + escape(str(e)) + '"}'
        return json_response
项目:mordecai    作者:openeventdata    | 项目源码 | 文件源码
def query_geonames(self, placename):
        """
        Wrap search parameters into an elasticsearch query to the geonames index
        and return results.

        Parameters
        ---------
        conn: an elasticsearch Search conn, like the one returned by `setup_es()`

        placename: str
            the placename text extracted by NER system

        Returns
        -------
        out: The raw results of the elasticsearch query
        """
        # first first, try for country name
        if self.is_country(placename):
            q = {"multi_match": {"query": placename,
                                 "fields": ['name', 'asciiname', 'alternativenames'],
                                "type" : "phrase"}}
            r = Q("match", feature_code='PCLI')
            res = self.conn.query(q).query(r)[0:5].execute()  # always 5
            #self.country_exact = True

        else:
            # second, try for an exact phrase match
            q = {"multi_match": {"query": placename,
                                 "fields": ['name^5', 'asciiname^5', 'alternativenames'],
                                "type" : "phrase"}}

            res = self.conn.query(q)[0:50].execute()

            # if no results, use some fuzziness, but still require all terms to be present.
            # Fuzzy is not allowed in "phrase" searches.
            if res.hits.total == 0:
                # tried wrapping this in a {"constant_score" : {"query": ... but made it worse
                q = {"multi_match": {"query": placename,
                                     "fields": ['name', 'asciiname', 'alternativenames'],
                                         "fuzziness" : 1,
                                         "operator":   "and"},
                        }
                #self.fuzzy = True  # idea was to preserve this info as a feature, but not using state like this
                res = self.conn.query(q)[0:50].execute()


        es_result = utilities.structure_results(res)
        return es_result
项目:micromasters    作者:mitodl    | 项目源码 | 文件源码
def create_search_obj(user, search_param_dict=None, filter_on_email_optin=False):
    """
    Creates a search object and prepares it with metadata and query parameters that
    we want to apply for all ES requests

    Args:
        user (User): User object
        search_param_dict (dict): A dict representing the body of an ES query
        filter_on_email_optin (bool): If true, filter out profiles where email_optin != True

    Returns:
        Search: elasticsearch_dsl Search object
    """
    staff_program_ids = get_advance_searchable_program_ids(user)
    is_advance_search_capable = bool(staff_program_ids)
    search_obj = Search(index=get_default_alias(), doc_type=_get_search_doc_types(is_advance_search_capable))
    # Update from search params first so our server-side filtering will overwrite it if necessary
    if search_param_dict is not None:
        search_obj.update_from_dict(search_param_dict)

    if not is_advance_search_capable:
        # Learners can't search for other learners with privacy set to private
        search_obj = search_obj.filter(
            ~Q('term', **{'profile.account_privacy': Profile.PRIVATE})
        )

    # Limit results to one of the programs the user is staff on
    search_obj = search_obj.filter(create_program_limit_query(
        user,
        staff_program_ids,
        filter_on_email_optin=filter_on_email_optin
    ))
    # Filter so that only filled_out profiles are seen
    search_obj = search_obj.filter(
        Q('term', **{'profile.filled_out': True})
    )
    # Force size to be the one we set on the server
    update_dict = {'size': settings.ELASTICSEARCH_DEFAULT_PAGE_SIZE}
    if search_param_dict is not None and search_param_dict.get('from') is not None:
        update_dict['from'] = search_param_dict['from']
    search_obj.update_from_dict(update_dict)

    return search_obj
项目:web    作者:pyjobs    | 项目源码 | 文件源码
def index(self, query=None, radius=None, center=None, sort_by=None,
              *args, **kwargs):
        if not query and not radius and not center:
            redirect('/jobs')

        search_query = JobElastic().search()
        relevance_sort = sort_by == 'scores'

        if query:
            keyword_queries = self._compute_keyword_queries(query)
            decay_functions = self._compute_decay_functions()

            search_query.query = Q(
                'function_score',
                query=keyword_queries,
                functions=decay_functions
            )
        else:
            relevance_sort = False

        try:
            geoloc_query = json.loads(center)
            coordinates = geoloc_query['coordinates']
            lat, lon = (coordinates['lat'], coordinates['lon'])
        except (ValueError, TypeError):
            # One of the following case has occurred:
            #     - Center wasn't a valid json string
            #     - Radius couldn't be converted to float
            # Since both these information are required to set a geolocation
            # filter are required, we ignore it.
            pass
        else:
            search_query = self._apply_geolocation_filters(
                search_query, (lat, lon), radius if radius else 5.0)

        date_sort = not relevance_sort

        if date_sort:
            search_query = self._apply_date_sort(search_query)

        return dict(sources=SOURCES, jobs=PaginatedSearch(search_query),
                    job_offer_search_form=JobsResearchForm)
项目:ssp-campaigns    作者:bloogrox    | 项目源码 | 文件源码
def get_subscribers(self, targetings, hours_whitelist, volume):
        logger.info("SubscriberService.get_subscribers: getting subscribers")
        start_time = time.time()
        timezones = [tz for tz in pytz.all_timezones
                     if (datetime
                         .now(pytz.timezone(tz)).hour
                         in hours_whitelist)]

        targetings.append({
            "field": "unsub",
            "operator": "NOT IN",
            "values": [1, "true"]
        })
        if timezones:
            targetings.append({
                "field": "timezone",
                "operator": "IN",
                "values": timezones
            })
        s = Search(using=es, index="users")
        operator_mappings = {
            'IN': 'must',
            'NOT IN': 'must_not',
        }

        q = Q()
        for condition in targetings:
            condition_pair = {condition["field"]: condition["values"]}
            terms_q = Q('terms', **condition_pair)
            bool_operator = operator_mappings[condition['operator']]
            bool_q = Q('bool', **{bool_operator: terms_q})
            q += bool_q
        s = s.query(q)
        s.query = dslq.FunctionScore(
            query=s.query,
            functions=[dslq.SF('random_score')],
            boost_mode="replace"
            )
        s = s[:volume]
        try:
            res = s.execute()
        except Exception as e:
            logger.error(f"SubscriberService.get_subscribers: Exception {e}")
        else:
            subscribers = []
            for row in res.hits:
                subscriber = row.to_dict()
                subscriber['_id'] = row.meta.id
                subscribers.append(subscriber)
            end_time = time.time()
            logger.debug(f"SubscriberService.get_subscribers: finished in "
                         f"{int((end_time - start_time) * 1000)}ms")
            return subscribers