Python elasticsearch 模块,Elasticsearch() 实例源码

我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用elasticsearch.Elasticsearch()

项目:fccforensics    作者:RagtagOpen    | 项目源码 | 文件源码
def __init__(self, lte=None, gte=None, limit=250, sort='date_disseminated,ASC', fastout=False, verify=True, endpoint='http://127.0.0.1/', start_offset=0):
        if gte and not lte:
            lte = datetime.now().isoformat()
        if lte and not gte:
            gte = '2000-01-01'
        self.lte = lte
        self.gte = gte
        self.limit = limit
        self.sort = sort
        self.fastout = fastout
        self.verify = verify
        self.endpoint = endpoint
        self.fcc_endpoint = 'https://ecfsapi.fcc.gov/filings'
        self.index_fields = mappings.FIELDS.keys()
        self.es = Elasticsearch(self.endpoint, timeout=30)
        self.start_offset = start_offset
        self.stats = {'indexed': start_offset, 'fetched': start_offset}
项目:defplorex    作者:trendmicro    | 项目源码 | 文件源码
def __init__(self, settings):
        kwargs = settings.get('es').get('client')
        es_user = settings.get('es_user')
        es_pass = settings.get('es_pass')

        if es_user and es_pass:
            kwargs.update(**dict(http_auth=(es_user, es_pass)))

        self.client = Elasticsearch(**kwargs)
        self.timeout = settings.get('es').get('client').get('timeout')
        self.doc_type = settings.get('es').get('doc_type')
        self.index_name = settings.get('es').get('index')
        self.id_field = settings.get('id_field')
        self.bulk_size = settings.get('bulk_size', 1000)
        self.path_encoding = settings.get('path_encoding')

        self.actions = []

        log.debug('ESStorer instance created: %s', self.client)
项目:nhs-crawler    作者:snava10    | 项目源码 | 文件源码
def run(args):
    elasticsearchServer = args[0] if len(args) else 'localhost:9200'
    indexName = 'nhs_conditions'
    docType = 'condition'

    es = Elasticsearch(elasticsearchServer)
    es.indices.delete(index=indexName, ignore=[400,404])

    f = open('nhsPageContent','w')
    f.write('[')
    for model in get_pages_info_models('http://www.nhs.uk/Conditions/Pages/hub.aspx'):
        json = model.to_json()
        es.index(index=indexName, doc_type=docType, body=json)
        f.write(json + ",\n")

    f.write(']')
    f.close()
    es.indices.refresh(index=indexName)
项目:Stream4Flow    作者:CSIRT-MU    | 项目源码 | 文件源码
def get_summary_statistics():
    """
    Obtains statistics about current sum of flows, packets, bytes.

    :return: JSON with status "ok" or "error" and requested data.
    """

    try:
        # Elastic query
        client = elasticsearch.Elasticsearch([{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}])
        elastic_bool = []
        elastic_bool.append({'range': {'@timestamp': {'gte': "now-5m", 'lte': "now"}}})
        elastic_bool.append({'term': {'@type': 'protocols_statistics'}})

        qx = Q({'bool': {'must': elastic_bool}})
        s = Search(using=client, index='_all').query(qx)
        s.aggs.bucket('sum_of_flows', 'sum', field='flows')
        s.aggs.bucket('sum_of_packets', 'sum', field='packets')
        s.aggs.bucket('sum_of_bytes', 'sum', field='bytes')
        s.sort('@timestamp')
        result = s.execute()

        # Result Parsing into CSV in format: timestamp, tcp protocol value, udp protocol value
        data = "Timestamp, Flows, Packets, Bytes;"
        timestamp = "Last 5 Minutes"
        data += timestamp + ', ' +\
                str(int(result.aggregations.sum_of_flows['value'])) + ', ' +\
                str(int(result.aggregations.sum_of_packets['value'])) + ', ' +\
                str(int(result.aggregations.sum_of_bytes['value']))

        json_response = '{"status": "Ok", "data": "' + data + '"}'
        return json_response

    except Exception as e:
        json_response = '{"status": "Error", "data": "Elasticsearch query exception: ' + escape(str(e)) + '"}'
        return json_response
项目:mordecai    作者:openeventdata    | 项目源码 | 文件源码
def setup_es(es_ip, es_port):
    """
    Setup an Elasticsearch connection

    Parameters
    ----------
    es_ip: string
            IP address for elasticsearch instance
    es_port: string
            Port for elasticsearch instance
    Returns
    -------
    es_conn: an elasticsearch_dsl Search connection object.
    """
    CLIENT = Elasticsearch([{'host' : es_ip, 'port' : es_port}])
    S = Search(using=CLIENT, index="geonames")
    return S
项目:ceres    作者:dicortazar    | 项目源码 | 文件源码
def ESConnection():

    parser = configparser.ConfigParser()
    conf_file = 'settings'
    fd = open(conf_file, 'r')
    parser.readfp(fd)
    fd.close()

    sections = parser.sections()
    for section in sections:
        options = parser.options(section)
        for option in options:
            if option == 'user': user = parser.get(section, option)
            if option == 'password': password = parser.get(section, option)
            if option == 'host': host = parser.get(section, option)
            if option == 'port': port = parser.get(section, option)
            if option == 'path': path = parser.get(section, option)
            #if option == 'genderize_key': key = parser.get(section, option)


    connection = "https://" + user + ":" + password + "@" + host + ":" + port + "/" + path
    es_write = Elasticsearch([connection], verify_certs=False)

    #es_write = Elasticsearch(["127.0.0.1:9200"])
    return es_write
项目:valhalla    作者:LCOGT    | 项目源码 | 文件源码
def __init__(self, start, end, telescopes=None, sites=None, instrument_types=None):
        try:
            self.es = Elasticsearch([settings.ELASTICSEARCH_URL])
        except LocationValueError:
            logger.error('Could not find host. Make sure ELASTICSEARCH_URL is set.')
            raise ImproperlyConfigured('ELASTICSEARCH_URL')

        self.instrument_types = instrument_types
        self.available_telescopes = self._get_available_telescopes()

        sites = list({tk.site for tk in self.available_telescopes}) if not sites else sites
        telescopes = list({tk.telescope for tk in self.available_telescopes if tk.site in sites}) \
            if not telescopes else telescopes

        self.start = start.replace(tzinfo=timezone.utc).replace(microsecond=0)
        self.end = end.replace(tzinfo=timezone.utc).replace(microsecond=0)
        cached_event_data = cache.get('tel_event_data')
        if cached_event_data:
            self.event_data = cached_event_data
        else:
            self.event_data = self._get_es_data(sites, telescopes)
            cache.set('tel_event_data', self.event_data, 1800)
项目:panels    作者:grimoirelab    | 项目源码 | 文件源码
def __init__(self, host='localhost', port='9200', protocol='http', path=None, user=None, password=None):
        """ Class constructor

        :param url: ElasticSearch host domain
        :param port: ElasticSearch port connection
        :param protocol: ElasticSearch protocol (typically http or https)
        :param path: ElasticSearch patch connection
        :param user: ElasticSearch user connection
        :param password: ElasticSearch password connection
        """

        credentials = ""
        if user is not None or password is not None:
            credentials = user + ":" + password + "@"
        if path is None:
            path = ""
        connection = protocol + "://" + credentials + host + ":" + port + path
        print(connection)
        self.es = Elasticsearch([connection])
项目:gremlinsdk-python    作者:ResilienceTesting    | 项目源码 | 文件源码
def __init__(self, host, test_id, debug=False):
        """
        param host: the elasticsearch host
        test_id: id of the test to which we are reqstricting the queires
        """
        self._es = Elasticsearch(host)
        self._id = test_id
        self.debug=debug
        self.functiondict = {
            'no_proxy_errors' : self.check_no_proxy_errors,
            'bounded_response_time' : self.check_bounded_response_time,
            'http_success_status' : self.check_http_success_status,
            'http_status' : self.check_http_status,
#            'reachability' : self.check_reachability,
            'bounded_retries' : self.check_bounded_retries,
            'circuit_breaker' : self.check_circuit_breaker,
            'at_most_requests': self.check_at_most_requests
        }
项目:oclubs    作者:SHSIDers    | 项目源码 | 文件源码
def get(doc_type, doc_id, fields=True):
    """
    Get an Elasticsearch document.

    :param basestring doc_type: document type
    :param doc_id: document id, will be converted into basestring
    :param fields: if ``False``, returns whether the document is found as bool;
        if ``True``, returns the document dict; if list of string, returns the
        document dict with only the specified fields.
    :rtype: dict or bool
    """
    ret = es.get(
        index='oclubs',
        doc_type=doc_type,
        id=doc_id,
        _source=fields
    )

    if fields is not False:
        return ret['_source']
    else:
        return ret['found']
项目:mcp-watch    作者:bernard357    | 项目源码 | 文件源码
def use_store(self):
        """
        Opens a database to save data
        """

        logging.info('Using Elasticsearch database')

        self.db = Elasticsearch(
            [self.settings.get('host', 'localhost:9200')],
            )

        try:
            self.db.indices.create(index='mcp-watch', ignore=400) # may exist
        except ConnectionError as feedback:
            logging.error('- unable to connect')
            raise

        return self.db
项目:mcp-watch    作者:bernard357    | 项目源码 | 文件源码
def reset_store(self):
        """
        Opens a database for points
        """

        logging.info('Resetting Elasticsearch database')

        self.db = Elasticsearch(
            [self.settings.get('host', 'localhost:9200')],
            )

        try:
            self.db.indices.create(index='mcp-watch', ignore=400) # may exist
        except ConnectionError as feedback:
            logging.error('- unable to connect')
            raise

        return self.db
项目:browbeat    作者:openstack    | 项目源码 | 文件源码
def __init__(self, config, workload, tool="browbeat", cache_size=1000, max_cache_time=10):
        self.config = config
        self.cache = deque()
        self.max_cache_size = cache_size
        self.last_upload = datetime.datetime.utcnow()
        self.max_cache_age = datetime.timedelta(minutes=max_cache_time)
        self.logger = logging.getLogger('browbeat.elastic')
        self.es = elasticsearch.Elasticsearch([
            {'host': self.config['elasticsearch']['host'],
             'port': self.config['elasticsearch']['port']}],
            send_get_body_as='POST'
        )
        self.workload = workload
        today = datetime.datetime.today()
        self.index = "{}-{}-{}".format(tool,
                                       workload, today.strftime('%Y.%m.%d'))
项目:availability    作者:seecloud    | 项目源码 | 文件源码
def get_elasticsearch(check_availability=False):
    """Return Elasticsearch instance.

    :param check_availability: check if nodes are available
    :returns: Elasticsearch or None on failure
    :rtype: elasticsearch.Elasticsearch
    """
    nodes = config.get_config()["backend"]["connection"]
    try:
        es = elasticsearch.Elasticsearch(nodes)
        if check_availability:
                es.info()
    except Exception as e:
        LOG.warning(
            "Failed to query Elasticsearch nodes %s: %s"
            % (nodes, str(e)))
        raise
    return es
项目:open-ledger    作者:creativecommons    | 项目源码 | 文件源码
def init_es(timeout=TIMEOUT):
    log.info("connecting to %s %s", settings.ELASTICSEARCH_URL, settings.ELASTICSEARCH_PORT)
    auth = AWSRequestsAuth(aws_access_key=settings.AWS_ACCESS_KEY_ID,
                           aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
                           aws_host=settings.ELASTICSEARCH_URL,
                           aws_region='us-west-1',
                           aws_service='es')
    auth.encode = lambda x: bytes(x.encode('utf-8'))
    es = Elasticsearch(host=settings.ELASTICSEARCH_URL,
                       port=settings.ELASTICSEARCH_PORT,
                       connection_class=RequestsHttpConnection,
                       timeout=timeout,
                       max_retries=10, retry_on_timeout=True,
                       http_auth=auth)
    return es
项目:metastore    作者:datahq    | 项目源码 | 文件源码
def setUp(self):

        # Clean index
        self.es = Elasticsearch(hosts=[LOCAL_ELASTICSEARCH])
        try:
            self.es.indices.delete(index='datahub')
            self.es.indices.delete(index='events')
        except NotFoundError:
            pass
        self.es.indices.create('datahub')
        mapping = {'dataset': {'properties': self.MAPPING}}
        self.es.indices.put_mapping(doc_type='dataset',
                                    index='datahub',
                                    body=mapping)

        self.es.indices.create('events')
        mapping = {'event': {'properties': {'timestamp': {'type': 'date'}}}}
        self.es.indices.put_mapping(doc_type='event',
                                    index='events',
                                    body=mapping)
项目:elasticsearch2-haystack    作者:NDevox    | 项目源码 | 文件源码
def setup():
    try:
        from elasticsearch import Elasticsearch, ElasticsearchException
    except ImportError:
        raise unittest.SkipTest("elasticsearch-py not installed.")

    es = Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'])
    try:
        es.info()
    except ElasticsearchException as e:
        raise unittest.SkipTest("elasticsearch not running on %r" % settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], e)

    global test_runner
    global old_config

    from django.test.runner import DiscoverRunner

    test_runner = DiscoverRunner()
    test_runner.setup_test_environment()
    old_config = test_runner.setup_databases()
项目:django-rest-search    作者:wemap    | 项目源码 | 文件源码
def __create_connection(self, config):
        kwargs = {
            'host': config['HOST'],
            'port': config.get('PORT', 9200),
            'use_ssl': config.get('USE_SSL', False),
            'verify_certs': True,
            'ca_certs': certifi.where()
        }

        if 'AWS_ACCESS_KEY' in config and \
           'AWS_SECRET_KEY' in config and \
           'AWS_REGION' in config:

            kwargs['connection_class'] = RequestsHttpConnection
            kwargs['http_auth'] = AWSRequestsAuth(
                aws_access_key=config['AWS_ACCESS_KEY'],
                aws_secret_access_key=config['AWS_SECRET_KEY'],
                aws_host=config['HOST'],
                aws_region=config['AWS_REGION'],
                aws_service='es')

        es = Elasticsearch(**kwargs)
        es._index = config['INDEX_NAME']
        es._settings = config.get('INDEX_SETTINGS', DEFAULT_INDEX_SETTINGS)
        return es
项目:dminer    作者:infosecanon    | 项目源码 | 文件源码
def destroy(self):
        """
        Deletes all elasticsearch history for Alphabay. It will delete all
        indexes matching:

            dminer-alphabay-*

        It will also delete the template for indexes. This template is named:

            dminer-alphabay-template
        """

        es = Elasticsearch([":".join([str(self.host), str(self.port)])])
        self.logger.info("Deleting index: dminer-alphabay-*")
        es.indices.delete("dminer-alphabay-*")
        self.logger.info("Deleting index template: dminer-dreammarket-template")
        es.indices.delete_template("dminer-alphabay-template")
项目:dminer    作者:infosecanon    | 项目源码 | 文件源码
def destroy(self):
        """
        Deletes all elasticsearch history for Hansa. It will delete all
        indexes matching:

            dminer-hansa-*

        It will also delete the template for indexes. This template is named:

            dminer-hansa-template
        """

        es = Elasticsearch([":".join([str(self.host), str(self.port)])])
        self.logger.info("Deleting index: dminer-hansa-*")
        es.indices.delete("dminer-hansa-*")
        self.logger.info("Deleting index template: dminer-dreammarket-template")
        es.indices.delete_template("dminer-hansa-template")
项目:dminer    作者:infosecanon    | 项目源码 | 文件源码
def destroy(self):
        """
        Deletes all elasticsearch history for DreamMarket. It will delete all
        indexes matching:

            dminer-dreammarket-*

        It will also delete the template for indexes. This template is named:

            dminer-dreammarket-template
        """

        es = Elasticsearch([":".join([str(self.host), str(self.port)])])
        self.logger.info("Deleting index: dminer-dreammarket-*")
        es.indices.delete("dminer-dreammarket-*")
        self.logger.info("Deleting index template: dminer-dreammarket-template")
        es.indices.delete_template("dminer-dreammarket-template")
项目:knowledge-graph    作者:MixedEmotions    | 项目源码 | 文件源码
def getKibiRelationConfig(indexName=".kibi", typeName="config" , elasticPort=9220, elasticHost="localhost"):
    es =  Elasticsearch([{'host': elasticHost, 'port': elasticPort}], http_auth=(elasticUsername, elasticPassword))
    mapping = es.search(
                index=indexName,
                doc_type=typeName,
                size=1000,
                request_timeout=1060,
                body={
                'query': {
                    'filtered': {
                      'query': {
                        'match_all': {}
                      }
                    }
                  }
                }
            )
    return mapping['hits']['hits'][0]
    #return mapping[SourceIndexName]["mappings"][SourceTypeName]
项目:airstream    作者:airtasker    | 项目源码 | 文件源码
def lambda_handler(event, context):
    es = Elasticsearch(os.environ['ELASTICSEARCH_URL'])
    indices = es.indices.get('*')
    for record in event['Records']:
        # Kinesis data is base64 encoded so decode here
        payload = base64.b64decode(record['kinesis']['data'])
        create_object_payload = json.loads(payload)
        doc_type_name = create_object_payload['event_type']
        index_name = create_object_payload['event_payload']['index_name']
        payload_data = create_object_payload['event_payload']['data']

        index_terms = index_name.split('_')
        del index_terms[-1]
        index_prefix = '_'.join(index_terms)
        latest_index_name = max(filter(lambda k: index_prefix in k, indices))
        if latest_index_name != index_name:
            index_name = latest_index_name

        res = es.index(index=index_name, doc_type=doc_type_name, id=str(create_object_payload['object_id']), body=payload_data)
    for conn in es.transport.connection_pool.connections:
        conn.pool.close()

    return 'Successfully processed {} records.'.format(len(event['Records']))
项目:airstream    作者:airtasker    | 项目源码 | 文件源码
def lambda_handler(event, context):
    es = Elasticsearch(os.environ['ELASTICSEARCH_URL'])
    indices_list = es.indices.get('*')
    for record in event['Records']:
        # Kinesis data is base64 encoded so decode here
        payload = base64.b64decode(record['kinesis']['data'])
        update_object_payload = json.loads(payload)
        doc_type_name = update_object_payload['event_type']
        index_name = update_object_payload['event_payload']['index_name']
        payload_data = update_object_payload['event_payload']['data']

        index_terms = index_name.split('_')
        del index_terms[-1]
        index_prefix = '_'.join(index_terms)
        latest_index_name = max(filter(lambda k: index_prefix in k, indices_list))
        if latest_index_name != index_name:
            index_name = latest_index_name

        res = es.update(index=index_name, doc_type=doc_type_name, id=str(update_object_payload['object_id']), body={'doc': payload_data, 'doc_as_upsert':True})

    for conn in es.transport.connection_pool.connections:
        conn.pool.close()

    return 'Successfully processed {} records.'.format(len(event['Records']))
项目:pytest-elasticsearch    作者:ClearcodeHQ    | 项目源码 | 文件源码
def elasticsearch(process_fixture_name):
    """
    Create Elasticsearch client fixture.

    :param str process_fixture_name: elasticsearch process fixture name
    """
    @pytest.fixture
    def elasticsearch_fixture(request):
        """Elasticsearch client fixture."""
        process = request.getfixturevalue(process_fixture_name)
        if not process.running():
            process.start()

        hosts = '{0!s}:{1!s}'.format(process.host, process.port)

        client = Elasticsearch(hosts=hosts)

        def drop_indexes():
            client.indices.delete(index='*')

        request.addfinalizer(drop_indexes)

        return client

    return elasticsearch_fixture
项目:espandas    作者:dashaub    | 项目源码 | 文件源码
def test_es():
    """
    Before running other tests, ensure connection to ES is established
    """
    es = Elasticsearch()
    try:
        es.indices.create(INDEX)
        es.indices.delete(INDEX)
        return True
    except RequestError:
        print('Index already exists: skipping tests.')
        return False
    except ConnectionError:
        print('The ElasticSearch backend is not running: skipping tests.')
        return False
    except Exception as e:
        print('An unknown error occured connecting to ElasticSearch: %s' % e)
        return False
项目:ResearchGate-Analyser    作者:moinfar    | 项目源码 | 文件源码
def clustering_pubs_page(request):
    if request.GET.get('index_name') is not None and request.GET.get('index_name') != '':
        dci = DocClusteringInfo()
        dci.doc_type = "publication"
        dci.cost = -1
        dci.iter = -1
        dci.k = -1
        dci.index_name = request.GET.get('index_name')
        dci.save()
        if request.GET.get('k') is not None:
            clustering_pubs.tasks.cluster_index.delay(dci.index_name, k=int(request.GET.get('k')))
        else:
            clustering_pubs.tasks.cluster_index.delay(dci.index_name)
        return redirect("/clustering_pubs/status/%s/" % dci.index_name)

    es = Elasticsearch()
    indexes = es.indices.get_mapping()
    return render(request, 'clustering_pubs.html', {'indexes': indexes})
项目:ResearchGate-Analyser    作者:moinfar    | 项目源码 | 文件源码
def retrieve_dataset(index_name, doc_type, weight={'title': 5, 'abstract': 1}):
    es = Elasticsearch()
    results = es.search(index=index_name, doc_type=doc_type, size=10000)['hits']['hits']
    dataset = {}
    for res in results:
        doc = DocumentInfo(res['_id'])
        term_vectors = es.termvectors(index=index_name, doc_type=doc_type, id=res['_id'], offsets=False,
                                      payloads=False, positions=False, fields='title,abstract',
                                      field_statistics=False)['term_vectors']
        for zone in {'abstract', 'title'}:
            term_vector = term_vectors[zone]['terms']
            for term in term_vector:
                stemmed = stem(term)
                if stemmed.isalpha():
                    if stemmed not in doc.tf:
                        doc.tf[stemmed] = term_vector[term]['term_freq'] * weight[zone]
                    else:
                        doc.tf[stemmed] += term_vector[term]['term_freq'] * weight[zone]
        dataset[res['_id']] = doc
    return dataset
项目:ResearchGate-Analyser    作者:moinfar    | 项目源码 | 文件源码
def indexing_status_page(request, id):
    es = Elasticsearch()
    crawl_info = CrawlInfo.objects.get(id=id)
    try:
        es.indices.refresh(index="index-%d" % crawl_info.id)
        percentage = int(es.count("index-%d" % crawl_info.id, crawl_info.type).get('count') * 100 /
                         crawl_info.successful_crawls)
        percentage = max(1, percentage)
    except Exception as e:
        percentage = 0

    if request.GET.get('type', 'HTML') == 'JSON':
        result = json.dumps({'status': 'OK', 'percent': percentage},
                            ensure_ascii=False, encoding='utf8')
        return HttpResponse(result, content_type='application/json; charset=utf-8')

    return render(request, 'indexing_status.html', {'percent': percentage})
项目:ahmia-site    作者:ahmia    | 项目源码 | 文件源码
def get_elasticsearch_object():
    """ Creating an elasticsearch object to query the index """
    try:
        es_servers = settings.ELASTICSEARCH_SERVERS
        es_servers = es_servers if isinstance(es_servers, list) \
            else [es_servers]
    except AttributeError:
        es_servers = ["http://localhost:9200"]#["https://ahmia.fi/esconnection/"]

    try:
        timeout = settings.ELASTICSEARCH_TIMEOUT
    except AttributeError:
        timeout = 60
    es_obj = Elasticsearch(hosts=es_servers,
                           timeout=timeout)
    return es_obj
项目:Image-search-engine    作者:praveenKumar88    | 项目源码 | 文件源码
def setup_index(request, index_name):
    es = Elasticsearch()
    try:
        es.indices.create(index=index_name)
    except RequestError as e:
        if e.error == u'index_already_exists_exception':
            es.indices.delete(index_name)
        else:
            raise

    def fin():
        try:
            es.indices.delete(index_name)
        except NotFoundError:
            pass

    request.addfinalizer(fin)
项目:mod-elastic-logs    作者:descrepes    | 项目源码 | 文件源码
def open(self):
        """
        Connect to ES cluster.
        Execute a command to check if connected on master to activate immediate connection to
        the DB because we need to know if DB server is available.
        Update log rotation time to force a log rotation
        """
        logger.info("[elastic-logs] trying to connect to ES Cluster: %s", self.hosts)
    self.es = Elasticsearch(self.hosts.split(','), timeout=int(self.timeout))
        try:
        self.es.cluster.health()
            logger.info("[elastic-logs] connected to the ES Cluster: %s", self.hosts)
            self.is_connected = CONNECTED
            self.next_logs_rotation = time.time()

        except TransportError, exp:
            logger.error("[elastic-logs] Cluster is not available: %s", str(exp))
        self.is_connected = DISCONNECTED
            return False

        return True
项目:PDF_text_extract    作者:theemadnes    | 项目源码 | 文件源码
def get_test_client(nowait=False, **kwargs):
    # construct kwargs from the environment
    kw = {'timeout': 30}
    if 'TEST_ES_CONNECTION' in os.environ:
        from elasticsearch import connection
        kw['connection_class'] = getattr(connection, os.environ['TEST_ES_CONNECTION'])

    kw.update(kwargs)
    client = Elasticsearch([os.environ.get('TEST_ES_SERVER', {})], **kw)

    # wait for yellow status
    for _ in range(1 if nowait else 100):
        try:
            client.cluster.health(wait_for_status='yellow')
            return client
        except ConnectionError:
            time.sleep(.1)
    else:
        # timeout
        raise SkipTest("Elasticsearch failed to start.")
项目:georef-api    作者:datosgobar    | 项目源码 | 文件源码
def build_condition(field, value, kind='match', fuzzy=False):
    """Crea una condición para Elasticsearch.

    Args:
        field (str): Campo de la condición.
        value (str): Valor de comparación.
        fuzzy (bool): Bandera para habilitar tolerancia a errores.

    Returns:
        dict: Condición para Elasticsearch.
    """
    if fuzzy and kind == 'match':
        query = {field: {'query': value, 'fuzziness': 1}}
    else:
        query = {field: value}
    return {kind: query}
项目:amazonbeat    作者:awormuth    | 项目源码 | 文件源码
def main():
    parser = argparse.ArgumentParser(
        description="Export the Kibana dashboards together with"
                    " all used visualizations, searches and index pattern")
    parser.add_argument("--url",
                        help="Elasticsearch URL. By default: http://localhost:9200",
                        default="http://localhost:9200")
    parser.add_argument("--regex",
                        help="Regular expression to match all the dashboards to be exported. For example: metricbeat*",
                        required=True)
    parser.add_argument("--kibana",
                        help="Elasticsearch index where to store the Kibana settings. By default: .kibana ",
                        default=".kibana")
    parser.add_argument("--dir", help="Output directory. By default: output",
                        default="output")

    args = parser.parse_args()

    print("Export {} dashboards to {} directory".format(args.regex, args.dir))
    print("Elasticsearch URL: {}".format(args.url))
    print("Elasticsearch index to store Kibana's"
          " dashboards: {}".format(args.kibana))

    es = Elasticsearch(args.url)
    ExportDashboards(es, args.regex, args.kibana, args.dir)
项目:daas    作者:havron    | 项目源码 | 文件源码
def setUp(self): # try to indice some fixtures to elasticsearch
    self.client = Client()

    fixtureA = {"listing_id":1,"drone": 2, "owner": 2, "description": "please rent myseediestdrone!", "time_posted": "2016-10-24T04:28:48.932Z", "price_per_day": 10.0}
    fixtureB = {"listing_id":2,"drone": 3, "owner": 3, "description": "please rent myforgeddrone!", "time_posted": "2016-10-24T04:28:48.991Z", "price_per_day": 14.0}

    es = Elasticsearch(['es'])
    es.index(index='listing_index', doc_type='listing', id=fixtureA['listing_id'], body=fixtureA)
    es.index(index='listing_index', doc_type='listing', id=fixtureB['listing_id'], body=fixtureB)
    es.indices.refresh(index='listing_index')
    producer.send('new-listings-topic', json.dumps(some_new_listing).encode('utf-8'))

    response = self.client.post(reverse('create-listing'), fixtureA)
    print("test_create_listing POST " + str(response))

    resp = json.loads(response.content.decode('utf8'))
    self.assertEquals(response.status_code, 200)
    print("listing_atts" + str(resp))

  # append number to test to get python to run defs in correct order
项目:kibtool    作者:jpparis-orange    | 项目源码 | 文件源码
def get_client():
  global client
  if client is not None:
    return client

  client = Elasticsearch([os.environ.get('TEST_ES_SERVER', {})], timeout=300)

  # wait for yellow status
  for _ in range(100):
    time.sleep(.1)
    try:
      client.cluster.health(wait_for_status='yellow')
      return client
    except ConnectionError:
      continue
  else:
    # timeout
    raise SkipTest("Elasticsearch failed to start.")
项目:kge-server    作者:vfrico    | 项目源码 | 文件源码
def __init__(self, dataset_type, dataset_id):
        """Data Access Object to interact with autocomplete

        The autocomplete is provided by Elasticsearch, and it is not divided
        by datasets, instead it is divided by dataset type.
        """
        # TODO: Generate an index on elasticsearch with allowed fields
        # The entity must be loaded with a dataset

        # Elasticsearch global params
        self.ELASTIC_ENDPOINT = "http://elasticsearch:9200/"
        self.ELASTIC_AUTH = ("elastic", "changeme")

        # Create Elasticsearch object
        self.es = Elasticsearch(self.ELASTIC_ENDPOINT,
                                http_auth=self.ELASTIC_AUTH)
        self.index = "entities"
        self.type = dataset_type
        self.dataset_id = dataset_id
        # Test if index exists, and if not, creates it
        if not self.es.indices.exists(index=self.index):
            self.generate_index(self.index)
项目:fccforensics    作者:RagtagOpen    | 项目源码 | 文件源码
def __init__(self, endpoint='http://localhost:9200/', verify=True, limit=10000, date=None):
        self.endpoint = endpoint
        self.verify = verify
        self.es = Elasticsearch(self.endpoint)
        self.limit = int(limit)
        self.indexed = 0
        self.date = date
项目:fccforensics    作者:RagtagOpen    | 项目源码 | 文件源码
def __init__(self, endpoint='http://localhost:9200/', source=None, limit=100):
        self.es = Elasticsearch(endpoint)
        self.last_call = time.time()
        self.source = source
        self.limit = limit
项目:fccforensics    作者:RagtagOpen    | 项目源码 | 文件源码
def run(self):
        es = Elasticsearch(self.endpoint)
        es.indices.create(index='fcc-comments', body=mappings.MAPPINGS)
        print('created fcc-comments index')
项目:gransk    作者:pcbje    | 项目源码 | 文件源码
def get_elasticsearch(self):
    """
    Get a connection to the Elasticsearch cluster. Currently on supports a
    single host.

    :returns: ``elasticsearch.Elasticsearch``
    """
    return Elasticsearch(hosts=self.config['es_host'], timeout=30)
项目:gransk    作者:pcbje    | 项目源码 | 文件源码
def get_elasticsearch_helper(self):
    """
    Get helpers module for Elasticsearch. Used to bulk index documents.

    :returns: package ``elasticsearch.helpers``
    """
    return helpers
项目:tableschema-elasticsearch-py    作者:frictionlessdata    | 项目源码 | 文件源码
def test_es_instance():
    '''An Elasticsearch instance can be passed to Storage or will be created'''
    storage = Storage()
    assert repr(storage) == "Storage <Elasticsearch([{}])>"

    es = Elasticsearch(['localhost'])
    storage = Storage(es)

    if six.PY2:
        assert repr(storage) == "Storage <Elasticsearch([{u'host': u'localhost'}])>"
    else:
        assert repr(storage) == "Storage <Elasticsearch([{'host': 'localhost'}])>"
项目:tableschema-elasticsearch-py    作者:frictionlessdata    | 项目源码 | 文件源码
def __init__(self, es=None):
        # Use the passed `es` or create a new Elasticsearch instance
        self.__es = es if es is not None else Elasticsearch()
项目:oio-sds-utils    作者:open-io    | 项目源码 | 文件源码
def main(_):
  maybe_download_and_extract()
  create_graph()
  b = Beanstalk.from_url("beanstalk://127.0.0.1:6014")
  b.watch("oio-process")
  while True:
    try:
        job_id, data = b.reserve()
    except ResponseError:
        continue
    meta = json.loads(data)
    url = meta["url"]
    print(url)
    if url["path"].split('.')[len(url["path"].split('.'))-1] == 'png':
        b.delete(job_id)
        continue
    s = object_storage.ObjectStorageAPI(url["ns"], "http://127.0.0.1:6006")
    meta, stream = s.object_fetch(url["account"], url["user"], url["path"])
    image = (np.frombuffer("".join(stream), np.uint8))
    #image = np.array(image)[:, :, 0:3]
    result=run_inference_on_image(image)
    print(json.dumps(result))
    s.object_update(url["account"], url["user"], url["path"], result)
    # /!\ Change the ip /!\
    es = Elasticsearch(['http://192.168.99.1:9200'])

    # Retrieve the metadata from the object
    meta, stream = s.object_fetch(url["account"], url["user"], url["path"])
    # Create the index in ElasticSearch if it does not exist
    if not es.indices.exists(url["account"].lower()):
        es.indices.create(index=url["account"].lower())

    # Push the metadatas to Elasticsearch
    res = es.index(index=url["account"].lower(), doc_type=url["user"].lower(), body=meta)
    es.indices.refresh(index=url["account"].lower())
    b.delete(job_id)
项目:oadoi    作者:Impactstory    | 项目源码 | 文件源码
def set_up_elastic(url):
    if not url:
        url = os.getenv("CROSSREF_ES_URL")
    es = Elasticsearch(url,
                       serializer=JSONSerializerPython2(),
                       retry_on_timeout=True,
                       max_retries=100)
    return es