Python elasticsearch_dsl 模块,Search() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用elasticsearch_dsl.Search()

项目:micromasters    作者:mitodl    | 项目源码 | 文件源码
def execute_search(search_obj):
    """
    Executes a search against ES after checking the connection

    Args:
        search_obj (Search): elasticsearch_dsl Search object

    Returns:
        elasticsearch_dsl.result.Response: ES response
    """
    # make sure there is a live connection
    if search_obj._index is None:  # pylint: disable=protected-access
        # If you're seeing this it means you're creating Search() without using
        # create_search_obj which sets important fields like the index and doc_type.
        raise ImproperlyConfigured("search object is missing an index")

    get_conn()
    return search_obj.execute()
项目:source-code-to-name    作者:zironycho    | 项目源码 | 文件源码
def first_words(index='codetoname', language='python'):
    es = elasticsearch.Elasticsearch()

    # update first name
    s = elasticsearch_dsl.Search(using=es, index=index, doc_type=language)\
        .query('bool', filter=Q('exists', field='feature') & Q('missing', field='first_name'))
    for hit in s.scan():
        data = hit.to_dict()
        feature = json.loads(data['feature'])
        data['first_name'] = firstname(feature['name'], language)
        es.index(index=index, doc_type=language, id=hit.meta.id, body=data)
    es.indices.refresh(index=index)

    # aggregation
    s = elasticsearch_dsl.Search(using=es, index=index, doc_type=language)\
        .query('bool', filter=Q('exists', field='feature'))
    a = A('terms', field='first_name')
    s.aggs.bucket('first_name_terms', a)
    response = s.execute()

    words = []
    for item in response.aggregations.first_name_terms.buckets:
        percentage = item.doc_count / float(response.hits.total) * 100
        words.append({'word': item.key, 'percentage': percentage})
    return words
项目:Stream4Flow    作者:CSIRT-MU    | 项目源码 | 文件源码
def get_summary_statistics():
    """
    Obtains statistics about current sum of flows, packets, bytes.

    :return: JSON with status "ok" or "error" and requested data.
    """

    try:
        # Elastic query
        client = elasticsearch.Elasticsearch([{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
        elastic_bool = []
        elastic_bool.append({'range': {'@timestamp': {'gte': "now-5m", 'lte': "now"}}})
        elastic_bool.append({'term': {'@type': 'protocols_statistics'}})

        qx = Q({'bool': {'must': elastic_bool}})
        s = Search(using=client, index='_all').query(qx)
        s.aggs.bucket('sum_of_flows', 'sum', field='flows')
        s.aggs.bucket('sum_of_packets', 'sum', field='packets')
        s.aggs.bucket('sum_of_bytes', 'sum', field='bytes')
        s.sort('@timestamp')
        result = s.execute()

        # Result Parsing into CSV in format: timestamp, tcp protocol value, udp protocol value
        data = "Timestamp, Flows, Packets, Bytes;"
        timestamp = "Last 5 Minutes"
        data += timestamp + ', ' +\
                str(int(result.aggregations.sum_of_flows['value'])) + ', ' +\
                str(int(result.aggregations.sum_of_packets['value'])) + ', ' +\
                str(int(result.aggregations.sum_of_bytes['value']))

        json_response = '{"status": "Ok", "data": "' + data + '"}'
        return json_response

    except Exception as e:
        json_response = '{"status": "Error", "data": "Elasticsearch query exception: ' + escape(str(e)) + '"}'
        return json_response
项目:falcon-api    作者:Opentopic    | 项目源码 | 文件源码
def filter_by(self, query, conditions, order_criteria=None):
        """
        :param query: Search object
        :type query: elasticsearch.Search

        :param conditions: conditions dictionary
        :type conditions: dict

        :param order_criteria: optional order criteria
        :type order_criteria: list

        :return: modified query
        :rtype: elasticsearch.Search
        """
        expressions = self._build_filter_expressions(conditions, None)
        if expressions is None:
            return query
        if order_criteria and '_score' not in order_criteria and '-_score' not in order_criteria:
            return query.update_from_dict({'query': {'constant_score': {'filter': expressions}}})
        return query.update_from_dict({'query': expressions})
项目:mordecai    作者:openeventdata    | 项目源码 | 文件源码
def setup_es(es_ip, es_port):
    """
    Setup an Elasticsearch connection

    Parameters
    ----------
    es_ip: string
            IP address for elasticsearch instance
    es_port: string
            Port for elasticsearch instance
    Returns
    -------
    es_conn: an elasticsearch_dsl Search connection object.
    """
    CLIENT = Elasticsearch([{'host' : es_ip, 'port' : es_port}])
    S = Search(using=CLIENT, index="geonames")
    return S
项目:micromasters    作者:mitodl    | 项目源码 | 文件源码
def prepare_and_execute_search(user, search_param_dict=None, search_func=execute_search,
                               filter_on_email_optin=False):
    """
    Prepares a Search object and executes the search against ES

    Args:
        user (User): User object
        search_param_dict (dict): A dict representing the body of an ES query
        search_func (callable): The function that executes the search
        filter_on_email_optin (bool): If true, filter out profiles where email_optin != True

    Returns:
        elasticsearch_dsl.result.Response: ES response
    """
    search_obj = create_search_obj(
        user,
        search_param_dict=search_param_dict,
        filter_on_email_optin=filter_on_email_optin,
    )
    return search_func(search_obj)
项目:open-ledger    作者:creativecommons    | 项目源码 | 文件源码
def test_work_types_dont_override_provider(self):
        """[#122] Selecting work types should be a subset of providers, not override them"""
        img1 = models.Image.objects.create(url='example.com/1', title='hello', provider='flickr')
        img2 = models.Image.objects.create(url='example.com/2', title='hello', provider='nypl')
        self._index_img(img1)
        self._index_img(img2)

        # Search by provider=flickr but work type=cultural should limit by Flickr first
        resp = self.client.get(self.url, {'search_fields': 'title',
                                          'search': 'hello',
                                          'providers': 'flickr',
                                          'work_types': 'cultural'})

        # One result, the correct one
        self.assertEqual(1, len(select_nodes(resp, '.t-image-result')))
        # We now have also img[data-identifier], which is used by photoswipe 
        self.assertEqual(1, len(select_nodes(resp, 'div[data-identifier="' + img1.identifier +'"]')))
        self.assertEqual(0, len(select_nodes(resp, 'div[data-identifier="' + img2.identifier +'"]')))
项目:open-ledger    作者:creativecommons    | 项目源码 | 文件源码
def about(request):
    """Information about the current site, its goals, and what content is loaded"""
    # Provider counts
    providers = cache.get_or_set(CACHE_STATS_NAME, [], CACHE_STATS_DURATION)
    if not providers:
        for provider in sorted(settings.PROVIDERS.keys()):
            s = Search()
            q = Q('term', provider=provider)
            s = s.query(q)
            response = s.execute()
            if response.hits.total > 0:
                data = settings.PROVIDERS[provider]
                total = intcomma(response.hits.total)
                data.update({'hits': total})
                providers.append(data)
        # All results
        s = Search()
        response = s.execute()
        total = intcomma(response.hits.total)
        providers.append({'display_name': 'Total', 'hits': total})
        cache.set(CACHE_STATS_NAME, providers)
    return render(request, "about.html", {'providers': providers})
项目:open-ledger    作者:creativecommons    | 项目源码 | 文件源码
def correct_orphan_records(self, provider='europeana', end=None):
        """[#185] Delete records from the search engine which aren't found in the database"""
        s = Search()
        q = Q('term', provider=provider)
        s = s.query(q)
        response = s.execute()
        total = response.hits.total
        # A file extracted from the production database listing all of the europeana identifiers
        identifier_file = '/tmp/europeana-identifiers.json'
        db_identifiers = set(json.load(open(identifier_file)))
        total_in_db = len(db_identifiers)
        log.info("Using search engine instance %s", settings.ELASTICSEARCH_URL)
        log.info("Total records: %d (search engine), %d (database) [diff=%d]", total, total_in_db, total - total_in_db)
        deleted_count = 0
        for r in s.scan():
            if r.identifier not in db_identifiers:
                img = search.Image.get(id=r.identifier)
                log.debug("Going to delete image %s", img)
                deleted_count += 1
        log.info("Deleted %d from search engine", deleted_count)
项目:invenio-stats    作者:inveniosoftware    | 项目源码 | 文件源码
def test_filter_robots(app, es, event_queues, indexed_events, with_robots):
    """Test the filter_robots query modifier."""
    query_modifiers = []
    if not with_robots:
        query_modifiers = [filter_robots]
    StatAggregator(client=current_search_client,
                   event='file-download',
                   aggregation_field='file_id',
                   aggregation_interval='day',
                   query_modifiers=query_modifiers).run()
    current_search_client.indices.refresh(index='*')
    query = Search(
        using=current_search_client,
        index='stats-file-download',
        doc_type='file-download-day-aggregation'
    )[0:30].sort('file_id')
    results = query.execute()
    assert len(results) == 3
    for result in results:
        if 'file_id' in result:
            assert result.count == (5 if with_robots else 2)
项目:invenio-stats    作者:inveniosoftware    | 项目源码 | 文件源码
def _get_oldest_event_timestamp(self):
        """Search for the oldest event timestamp."""
        # Retrieve the oldest event in order to start aggregation
        # from there
        query_events = Search(
            using=self.client,
            index=self.event_index
        )[0:1].sort(
            {'timestamp': {'order': 'asc'}}
        )
        result = query_events.execute()
        # There might not be any events yet if the first event have been
        # indexed but the indices have not been refreshed yet.
        if len(result) == 0:
            return None
        return parser.parse(result[0]['timestamp'])
项目:handelsregister    作者:Amsterdam    | 项目源码 | 文件源码
def to_elasticsearch_object(self, client) -> Search:
        assert self.indexes

        search = (
            Search()
            .using(client)
            .index(*self.indexes)
            .query(self.query)
        )
        if self.sort_fields:
            search = search.sort(*self.sort_fields)

        size = 15  # default size
        if self.size:
            size = self.size

        search = search[0:size]

        return search
项目:TrainerDex-RedCog    作者:TrainerDex    | 项目源码 | 文件源码
def pokedex(self, pokemon):
        s = Search(using=self.client, index="pokemon").query("match", name={'query': pokemon, 'fuzziness': 2})
        response = s.execute()
        if response.hits.total == 0:
            await self.bot.say("I couldn't find that pokemon")
            return
        hit = response[0]
        embed=discord.Embed(title=hit.name, url="http://bulbapedia.bulbagarden.net/wiki/{}".format(hit.name), timestamp=(datetime.datetime(2017,7,6)))
        embed.set_thumbnail(url="http://serebii.net/pokemongo/pokemon/{:03d}.png".format(int(hit.meta.id)))
        embed.add_field(name='Base Attack Stat', value=hit.attack_ratio)
        embed.add_field(name='Base Defence Stat', value=hit.defense_ratio)
        embed.add_field(name='Base HP Stat', value=hit.hp_ratio)
        embed.add_field(name='Min CP', value=hit.min_cp_cap)
        embed.add_field(name='Max CP', value=hit.max_cp_cap)
        embed.add_field(name='Best Offensive Moveset', value=hit.basic_attack+' / '+hit.charge_attack)
        #embed.add_field(name='Basic Atk', value=hit.basic_attack)
        #embed.add_field(name='Quick DPS', value=hit.quick_dps)
        #embed.add_field(name='Charge Atk', value=hit.charge_attack)
        #embed.add_field(name='Charge DPS', value=hit.charge_dps)
        #embed.add_field(name='Offensive %', value=hit.offensive_percent)
        #embed.add_field(name='Duel %', value=hit.duel_percent)
        #embed.add_field(name='Defensive %', value=hit.defensive_percent)
        #embed.add_field(name='Full Cycle DPS', value=hit.full_cycle_dps)
        embed.set_footer(text='Min and Max CP are for level 40. Best Offensive Moveset may be incorrect.')
        await self.bot.say(embed=embed)
项目:data-store    作者:HumanCellAtlas    | 项目源码 | 文件源码
def find(replica: str):
    owner = request.token_info['email']
    es_client = ElasticsearchClient.get(logger)

    search_obj = Search(using=es_client,
                        index=Config.get_es_index_name(ESIndexType.subscriptions, Replica[replica]),
                        doc_type=ESDocType.subscription.name)
    search = search_obj.query({'match': {'owner': owner}})

    responses = [{
        'uuid': hit.meta.id,
        'replica': replica,
        'owner': owner,
        'callback_url': hit.callback_url,
        'es_query': hit.es_query.to_dict()}
        for hit in search.scan()]

    full_response = {'subscriptions': responses}
    return jsonify(full_response), requests.codes.okay
项目:series-tiempo-ar-api    作者:datosgobar    | 项目源码 | 文件源码
def test_missing_field_update(self):
        """Al actualizar una distribución, si falta un field
        previamente indexado, no se borran los datos anteriores
        """
        missing_field = '212.1_PSCIOS_ERS_0_0_22'

        self._index_catalog('full_ts_data.json')
        # Segunda corrida, 'actualización' del catálogo
        self._index_catalog('missing_field.json')

        results = Search(using=self.elastic,
                         index=self.test_index) \
            .filter('match', series_id=missing_field).execute()

        self.assertTrue(len(results))
        self.assertTrue(Field.objects.filter(series_id=missing_field))
项目:stethoscope    作者:Netflix    | 项目源码 | 文件源码
def _get_notifications_by_email(self, email):
    search = elasticsearch_dsl.Search(using=self.client, index=self.config['ELASTICSEARCH_INDEX'],
      doc_type=self.config['ELASTICSEARCH_DOCTYPE'])

    query = self.create_query_for_email(search, email)

    # logger.debug("query:\n{!s}", pprint.pformat(query.to_dict()))

    try:
      response = query.execute()
    except elasticsearch.exceptions.ElasticsearchException:
      logger.exception("Exception caught in Elasticsearch query:\n  index: {!r}\n  doc_type: {!r}\n"
                       "  query: {!s}".format(self.config['ELASTICSEARCH_INDEX'],
                         self.config['ELASTICSEARCH_DOCTYPE'], pprint.pformat(query.to_dict())))

    # logger.debug("response:\n{!s}", pprint.pformat(response.to_dict()))

    return response.hits.hits
项目:fccforensics    作者:RagtagOpen    | 项目源码 | 文件源码
def tag_by_email(self, emails, breached):
        docs = []
        s = Search(using=self.es).\
            filter(Q({'terms': {'contact_email.keyword': emails}})).\
            source(['id_submission'])
        print('%s emails breached=%s' % (len(emails), breached))
        for hit in s.scan():
            docs.append(lib.bulk_update_doc(hit['id_submission'], {'breached': breached}))
            if not len(docs) % 500:
                print('\tfetched %s' % len(docs))
        print('\t%s matches' % len(docs))
        return docs
项目:fccforensics    作者:RagtagOpen    | 项目源码 | 文件源码
def run(self):
        emails = {
            'breached': set(),
            'unbreached': set(),
        }
        # contact_email exists
        must = [Q('exists', field='contact_email')]
        # matches source if specified
        if self.source:
            must.append(Q({'term': {'analysis.source': self.source}}))
        # not already tagged with breached
        s = Search(using=self.es).\
            query(FunctionScore(
                  query=Q('bool',
                          must=must,
                          must_not=[Q('exists', field='analysis.breached')]),
                  functions=[SF('random_score', seed=int(time.time()))]
            )).\
            source(['contact_email'])
        print('%s breached: source=%s limit=%s' % (datetime.now().isoformat(), self.source,
            self.limit))
        print('query=\n%s' % json.dumps(s.to_dict()))
        for filing in s[:self.limit]:
            email = filing['contact_email']
            if not email or email in emails['breached'] or email in emails['unbreached']:
                continue
            breached = self.is_breached(email)
            emails['breached' if breached else 'unbreached'].add(email)
        docs = []
        print('done source=%s' % self.source)
        if emails['breached']:
            docs += self.tag_by_email(list(emails['breached']), True)
        if emails['unbreached']:
            docs += self.tag_by_email(list(emails['unbreached']), False)
        try:
            lib.bulk_update(self.es, docs)
        except Exception as e:
            print('error indexing: %s' % e)
项目:defplorex    作者:trendmicro    | 项目源码 | 文件源码
def monitor(index, delta, query_string):
    click.clear()

    def cnt():
        q = Q('query_string', query=query_string)
        s = Search(
                using=es.client,
                index=index).query(q)
        return s.count()

    N = cnt()
    tot = Search(using=es.client, index=index).count()

    if not delta:
        N = tot

    log.info('Processing %d records (total: %d)', N, tot)

    click.echo('You can exit by CTRL-C: results will still process')

    bar = SlowOverallFancyBar('', max=N, grand_total=tot)
    while True:
        time.sleep(5.0)
        try:
            n = cnt()
            if isinstance(n, int):
                if delta:
                    done = N - n
                else:
                    done = n
                bar.goto(done)
        except Exception as e:
            log.warn('Cannot count: %s', e)
    bar.finish()
项目:defplorex    作者:trendmicro    | 项目源码 | 文件源码
def clone_index(use_helper, from_index, to_index):
    """Clone an index"""
    from elasticsearch_dsl import Search
    from elasticsearch.helpers import reindex

    click.clear()

    if not es.client.indices.exists(index=to_index):
        click.secho('%s not existing!'.format(to_index), fg='red')
        return 1

    cnt = Search(using=es.client, index=to_index).count()
    message = 'Index %s already exists (%d records). Overwrite?' % (
            to_index, cnt)

    click.confirm(message, abort=True)

    if use_helper:
        reindex(
                client=es.client,
                source_index=from_index,
                target_index=to_index)
    else:
        es.client.reindex(
                body=dict(
                    source=dict(index=from_index),
                    dest=dict(index=to_index)),
                wait_for_completion=False)
项目:defplorex    作者:trendmicro    | 项目源码 | 文件源码
def monitor_clone_index(from_index, to_index):
    """Monitor the size of an index"""
    from elasticsearch_dsl import Search

    click.clear()

    cnt = Search(using=es.client, index=from_index).count()

    bar = SlowFancyBar('', max=cnt)
    while True:
        time.sleep(2.0)
        _cnt = Search(using=es.client, index=to_index).count()
        bar.goto(_cnt)
    bar.finish()
项目:defplorex    作者:trendmicro    | 项目源码 | 文件源码
def search(self, **kwargs):
        q = kwargs.get('q', '*')
        sort = kwargs.get('sort', 'timestamp')
        search_after = kwargs.get('search_after')
        size = kwargs.get('size', 50)
        source = kwargs.get('source')
        extra = dict(
                size=size)

        if search_after:
            extra.update(dict(search_after=search_after))

        s = Search(using=self.client, index=self.index_name)
        if source:
            s = s.source(source)
        s = s.sort(sort)
        s = s.query(Q('query_string', query=q))
        s = s.extra(**extra)

        log.info('Query: %s', s.to_dict())

        r = s.execute()
        count = r.hits.total
        took = r.took

        result = r, count, took

        return result
项目:defplorex    作者:trendmicro    | 项目源码 | 文件源码
def count(self, index, query):
        try:
            s = Search(
                    using=self.client,
                    index=index,
                    doc_type=self.doc_type). \
                            update_from_dict(query)
            log.info('Querying: %s', s.to_dict())

            return s.count()
        except Exception as e:
            log.warn('Cannot count: %s', e)
项目:django-elasticsearch-dsl-drf    作者:barseghyanartur    | 项目源码 | 文件源码
def __init__(self, *args, **kwargs):
        assert self.document is not None

        self.client = connections.get_connection()
        self.index = self.document._doc_type.index
        self.mapping = self.document._doc_type.mapping.properties.name
        self.search = Search(using=self.client, index=self.index)
        super(BaseDocumentViewSet, self).__init__(*args, **kwargs)
项目:falcon-api    作者:Opentopic    | 项目源码 | 文件源码
def get_base_query(self, req, resp):
        return Search(using=self.connection,
                      index=self.objects_class._doc_type.index,
                      doc_type=self.objects_class)
项目:falcon-api    作者:Opentopic    | 项目源码 | 文件源码
def test_filter_by(connection, query_filtered):
    """
    Test `get_object` func
    """
    conditions, expected = query_filtered
    if isinstance(conditions, str):
        conditions = json.loads(conditions, object_pairs_hook=OrderedDict)
    if isinstance(expected, str):
        expected = json.loads(expected)
    c = CollectionResource(objects_class=Model, connection=connection)
    query_obj = c.filter_by(Search(using=connection).doc_type(Model), conditions)
    assert query_obj.to_dict()['query'] == expected
项目:falcon-api    作者:Opentopic    | 项目源码 | 文件源码
def test_order_by(connection, query_ordered):
    """
    Test `get_object` func
    """
    conditions, expected = query_ordered
    if isinstance(conditions, str):
        conditions = json.loads(conditions, object_pairs_hook=OrderedDict)
    if isinstance(expected, str):
        expected = json.loads(expected)
    query_obj = Search(using=connection, doc_type=Model).sort(*conditions)
    assert query_obj.to_dict() == expected
项目:falcon-api    作者:Opentopic    | 项目源码 | 文件源码
def test_totals(connection, query_totals):
    """
    Test `get_object` func
    """
    totals, expected = query_totals
    if isinstance(totals, str):
        totals = json.loads(totals, object_pairs_hook=OrderedDict)
    if isinstance(expected, str):
        expected = json.loads(expected)
    c = CollectionResource(objects_class=Model, connection=connection)
    query_obj = c._build_total_expressions(Search(using=connection).doc_type(Model), totals)
    assert query_obj.to_dict() == expected
项目:micromasters    作者:mitodl    | 项目源码 | 文件源码
def search_for_field(search_obj, field_name, page_size=DEFAULT_ES_LOOP_PAGE_SIZE):
    """
    Retrieves all unique instances of a field for documents that match an ES query

    Args:
        search_obj (Search): Search object
        field_name (str): The name of the field for the value to get
        page_size (int): Number of docs per page of results

    Returns:
        set: Set of unique values
    """
    results = set()
    # Maintaining a consistent sort on '_doc' will help prevent bugs where the
    # index is altered during the loop.
    # This also limits the query to only return the field value.
    search_obj = search_obj.sort('_doc').fields(field_name)
    loop = 0
    all_results_returned = False
    while not all_results_returned:
        from_index = loop * page_size
        to_index = from_index + page_size
        search_results = execute_search(search_obj[from_index: to_index])
        # add the field value for every search result hit to the set
        for hit in search_results.hits:
            results.add(getattr(hit, field_name)[0])
        all_results_returned = to_index >= search_results.hits.total
        loop += 1
    return results
项目:micromasters    作者:mitodl    | 项目源码 | 文件源码
def get_all_query_matching_emails(search_obj, page_size=DEFAULT_ES_LOOP_PAGE_SIZE):
    """
    Retrieves all unique emails for documents that match an ES query

    Args:
        search_obj (Search): Search object
        page_size (int): Number of docs per page of results

    Returns:
        set: Set of unique emails
    """
    return search_for_field(search_obj, "email", page_size=page_size)
项目:pandachaika    作者:pandabuilder    | 项目源码 | 文件源码
def title_suggest_view(request: HttpRequest) -> HttpResponse:
    query = request.GET.get('q', '')
    s = Search(using=es_client, index=es_index_name) \
        .source(['title']) \
        .query("match", title_suggest={'query': query, 'operator': 'and', 'fuzziness': 'AUTO'})
    response = s.execute()

    data = json.dumps(
        [{'id': i.meta.id, 'value': i.title} for i in response]
    )
    mime_type = 'application/json; charset=utf-8'
    return HttpResponse(data, mime_type)
项目:django-rest-elasticsearch    作者:myarik    | 项目源码 | 文件源码
def get_es_search(self):
        if self.es_model is None:
            msg = "Cannot use %s on a view which does not have the 'es_model'"
            raise ImproperlyConfigured(msg % self.__class__.__name__)
        index = self.es_model()._get_index()
        es_client = self.get_es_client()
        s = Search(using=es_client, index=index, doc_type=self.es_model)
        return s
项目:freshonions-torscraper    作者:dirtyfilthy    | 项目源码 | 文件源码
def elasticsearch_retrieve_page_by_id(page_id):
    query = Search().filter(Q("term", nid=int(page_id)))[:1]
    result = query.execute()
    if result.hits.total == 0:
        return None
    return result.hits[0]
项目:freshonions-torscraper    作者:dirtyfilthy    | 项目源码 | 文件源码
def elasticsearch_delete_old():
    _from = NEVER
    _to   = datetime.now() - timedelta(days=30)
    query = Search().filter(Q("range", visited_at={'from': _from, 'to': _to}))
    result = query.delete()
项目:freshonions-torscraper    作者:dirtyfilthy    | 项目源码 | 文件源码
def elasticsearch_pages(context, sort, page):
    result_limit = int(os.environ['RESULT_LIMIT'])
    max_result_limit = int(os.environ['MAX_RESULT_LIMIT'])
    start = (page - 1) * result_limit
    end   = start + result_limit
    domain_query = Q("term", is_banned=False)
    if context["is_up"]:
        domain_query = domain_query & Q("term", is_up=True)
    if not context["show_fh_default"]:
        domain_query = domain_query & Q("term", is_crap=False)
    if not context["show_subdomains"]:
        domain_query = domain_query & Q("term", is_subdomain=False)
    if context["rep"] == "genuine":
        domain_query = domain_query & Q("term", is_genuine=True)
    if context["rep"] == "fake":
        domain_query = domain_query & Q("term", is_fake=True)



    limit = max_result_limit if context["more"] else result_limit

    has_parent_query = Q("has_parent", type="domain", query=domain_query)
    if context['phrase']:
        query = Search().filter(has_parent_query).query(Q("match_phrase", body_stripped=context['search']))
    else:
        query = Search().filter(has_parent_query).query(Q("match", body_stripped=context['search']))

    query = query.highlight_options(order='score', encoder='html').highlight('body_stripped')[start:end]
    query = query.source(['title','domain_id','created_at', 'visited_at']).params(request_cache=True)

    if   context["sort"] == "onion":
        query = query.sort("_parent")
    elif context["sort"] == "visited_at":
        query = query.sort("-visited_at")
    elif context["sort"] == "created_at":
        query = query.sort("-created_at")
    elif context["sort"] == "last_seen":
        query = query.sort("-visited_at")

    return query.execute()
项目:open-ledger    作者:creativecommons    | 项目源码 | 文件源码
def setUp(self):
        super().setUp()
        self.es = search.init_es()
        connections.add_connection('default', self.es)
        self.s = Search(index=settings.ELASTICSEARCH_INDEX)
        search.Image.init()

        self.es.cluster.health(wait_for_status='yellow', request_timeout=2000)
        self.img1 = models.Image(title='greyhounds are fast',
                                 creator="Rashid",
                                 url='http://example.com/1',
                                 license='CC0',
                                 provider="flickr",
                                 source="openimages",
                                 tags_list=['greyhound', 'dog', 'object'])
        self.img2 = models.Image(title='pumpkins are orange',
                                 creator='???',
                                 url='http://example.com/2',
                                 license='CC-BY',
                                 provider="rijksmuseum",
                                 source="rijksmuseum",
                                 tags_list=['gourds', 'fruit', 'object'])
        self.img1.save()
        self.img2.save()
        self.url = reverse('index')
        self.removed = models.Image.objects.create(title='removed', url=FOREIGN_URL + TEST_IMAGE_REMOVED, license="cc0")
项目:invenio-stats    作者:inveniosoftware    | 项目源码 | 文件源码
def test_date_range(app, es, event_queues, indexed_events):
    aggregate_events(['file-download-agg'])
    current_search_client.indices.refresh(index='*')

    query = Search(using=current_search_client,
                   index='stats-file-download')[0:30].sort('file_id')
    results = query.execute()

    total_count = 0
    for result in results:
        if 'file_id' in result:
            total_count += result.count
    assert total_count == 30
项目:invenio-stats    作者:inveniosoftware    | 项目源码 | 文件源码
def get_bookmark(self):
        """Get last aggregation date."""
        if not Index(self.aggregation_alias,
                     using=self.client).exists():
            if not Index(self.event_index,
                         using=self.client).exists():
                return datetime.date.today()
            return self._get_oldest_event_timestamp()

        # retrieve the oldest bookmark
        query_bookmark = Search(
            using=self.client,
            index=self.aggregation_alias,
            doc_type='{0}-bookmark'.format(self.event)
        )[0:1].sort(
            {'date': {'order': 'desc'}}
        )
        bookmarks = query_bookmark.execute()
        # if no bookmark is found but the index exist, the bookmark was somehow
        # lost or never written, so restart from the beginning
        if len(bookmarks) == 0:
            return self._get_oldest_event_timestamp()

        # change it to doc_id_suffix
        bookmark = datetime.datetime.strptime(bookmarks[0].date,
                                              self.doc_id_suffix)
        return bookmark
项目:invenio-stats    作者:inveniosoftware    | 项目源码 | 文件源码
def build_query(self, start_date, end_date, **kwargs):
        """Build the elasticsearch query."""
        agg_query = Search(using=self.client,
                           index=self.index,
                           doc_type=self.doc_type)[0:0]
        if start_date is not None or end_date is not None:
            time_range = {}
            if start_date is not None:
                time_range['gte'] = start_date.isoformat()
            if end_date is not None:
                time_range['lte'] = end_date.isoformat()
            agg_query = agg_query.filter(
                'range',
                **{self.time_field: time_range})

        term_agg = agg_query.aggs
        for term in self.aggregated_fields:
            term_agg = term_agg.bucket(term, 'terms', field=term, size=0)
        term_agg.metric('total', 'sum', field='count')

        if self.copy_fields:
            term_agg.metric(
                'top_hit', 'top_hits', size=1, sort={'timestamp': 'desc'}
            )

        for query_param, filtered_field in self.required_filters.items():
            if query_param in kwargs:
                agg_query = agg_query.filter(
                    'term', **{filtered_field: kwargs[query_param]}
                )

        return agg_query
项目:scrapy-cdr    作者:TeamHG-Memex    | 项目源码 | 文件源码
def main():
    parser = argparse.ArgumentParser(description='Download items from ES index')
    arg = parser.add_argument
    arg('output', help='output in .jl.gz format')
    arg('index', help='ES index name')
    arg('--domain', help='url.domain to filter')
    arg('--id', help='record id')
    arg('--host', default='localhost', help='ES host in host[:port] format')
    arg('--user', help='HTTP Basic Auth user')
    arg('--password', help='HTTP Basic Auth password')
    arg('--chunk-size', type=int, default=100, help='download chunk size')

    args = parser.parse_args()
    kwargs = {}
    if args.user or args.password:
        kwargs['http_auth'] = (args.user, args.password)

    client = elasticsearch.Elasticsearch(
        [args.host],
        connection_class=elasticsearch.RequestsHttpConnection,
        timeout=600,
        **kwargs)
    print(client.info())

    search = Search(using=client, index=args.index)
    if args.domain:
        search = search.filter('term', **{'url.domain': args.domain})
    if args.id:
        search = search.filter('term', **{'_id': args.id})

    total = 0
    with tqdm.tqdm(total=search.count()) as pbar:
        with gzip.open(args.output, 'wt') as f:
            for x in search.params(size=args.chunk_size).scan():
                total += 1
                pbar.update(1)
                f.write(json.dumps(x.to_dict()))
                f.write('\n')

    print('{:,} items downloaded to {}'.format(total, args.output))
项目:handelsregister    作者:Amsterdam    | 项目源码 | 文件源码
def determine_metadata(self, request, view):
        result = super().determine_metadata(request, view)
        result['parameters'] = {
            'q': {
                'type': 'string',
                'description': 'The query to search for',
                'required': True,
            },
        }
        return result


# =============================================
# Search view sets
# =============================================
项目:handelsregister    作者:Amsterdam    | 项目源码 | 文件源码
def search_query(self, client, analyzer: InputQAnalyzer) -> Search:
        """
        Construct the search query that is executed by this view set.
        """
        raise NotImplementedError
项目:handelsregister    作者:Amsterdam    | 项目源码 | 文件源码
def search_query(self, client, analyzer: InputQAnalyzer) -> Search:
        """
        Execute search on Subject
        """
        search = vestiging_query(analyzer)\
            .to_elasticsearch_object(client)
        return search.filter('terms', _type=['vestiging'])
项目:handelsregister    作者:Amsterdam    | 项目源码 | 文件源码
def search_query(self, client, analyzer: InputQAnalyzer) -> Search:
        """
        Execute search on Subject
        """
        search = mac_query(analyzer).to_elasticsearch_object(client)
        return search
项目:jamdb    作者:CenterForOpenScience    | 项目源码 | 文件源码
def __init__(self, index, doc_type, connection=None):
        self._connection = connection or ElasticsearchBackend.DEFAULT_CONNECTION
        self._index = index
        self._doc_type = doc_type
        self._connection.indices.create(self._index, ignore=400)
        self._connection.indices.put_mapping(body={doc_type: self.ES_MAPPING}, index=index, doc_type=doc_type)
        self.search = elasticsearch_dsl.Search(self._connection, index=index, doc_type=doc_type)
项目:TrainerDex-RedCog    作者:TrainerDex    | 项目源码 | 文件源码
def find_gym(self, gym):
        s = Search(using=self.client, index="marker").query("match", title={'query': gym, 'fuzziness': 2, 'slop': 1})
        response = s.execute()
        if response.hits.total == 0:
            await self.bot.say("I couldn't find that gym")
            return None, None
        hit = response[0]
        monacle_gym = await self.get_monacle_gym(hit)
        return hit, monacle_gym
项目:source-code-to-name    作者:zironycho    | 项目源码 | 文件源码
def exists_repos_in_database(self, github_id):
        if 0 != elasticsearch_dsl \
                .Search(using=self._es, index=self._es_index, doc_type=self._language) \
                .query('term', repo__github_id=github_id) \
                .count():
            return True
        return False
项目:source-code-to-name    作者:zironycho    | 项目源码 | 文件源码
def num_repos(self):
        if self._es.indices.exists(index=self._es_index):
            s = elasticsearch_dsl.Search(using=self._es, index=self._es_index, doc_type=self._language)
            s.aggs.bucket('num_repos', A('cardinality', field='repo.github_id'))
            response = s.execute()
            return response.aggregations.num_repos.value
        return 0
项目:source-code-to-name    作者:zironycho    | 项目源码 | 文件源码
def get_features(self):
        if self._es.indices.exists(index=self._es_index):
            s = elasticsearch_dsl.Search(using=self._es, index=self._es_index, doc_type=self._language)
            response = s.execute()
            if 0 != len(response.hits):
                return response.hits
        return False
项目:TweetSets    作者:justinlittman    | 项目源码 | 文件源码
def update_dataset_stats(dataset):
    search = Search(index=get_tweets_index_name(dataset.meta.id))
    search = search.query('term', dataset_id=dataset.meta.id)[0:0]
    search.aggs.metric('created_at_min', 'min', field='created_at')
    search.aggs.metric('created_at_max', 'max', field='created_at')
    search_response = search.execute()
    dataset.first_tweet_created_at = datetime.utcfromtimestamp(
        search_response.aggregations.created_at_min.value / 1000.0)
    dataset.last_tweet_created_at = datetime.utcfromtimestamp(
        search_response.aggregations.created_at_max.value / 1000.0)
    dataset.tweet_count = search_response.hits.total
    dataset.save()