Je souhaite actuellement indexer 132 Million documents sur mes services ES hébergés dans aws ec2, j'ai pu faire 98 Million, pendant une semaine.

J'ai remarqué que la vitesse d'indexation diminuait progressivement à mesure que la taille de l'index augmentait, il se situe actuellement à 44GB.

J'ai essayé de suspendre le processus et de le reprendre à partir de certains points, mais la vitesse n'était définitivement pas cohérente.

Existe-t-il une relation entre la taille de l'index et la vitesse d'indexation des documents ?

J'apprécierais un conseil sur la façon d'améliorer la vitesse d'indexation dans ces cas, si possible les gars, merci d'avance.

Paramètres du cluster

{
   "persistent":{
      
   },
   "transient":{
      
   },
   "defaults":{
      "cluster":{
         "routing":{
            "use_adaptive_replica_selection":"false",
            "rebalance":{
               "enable":"all"
            },
            "allocation":{
               "node_concurrent_incoming_recoveries":"2",
               "node_initial_primaries_recoveries":"4",
               "same_shard":{
                  "host":"false"
               },
               "total_shards_per_node":"-1",
               "type":"balanced",
               "disk":{
                  "threshold_enabled":"true",
                  "watermark":{
                     "low":"85%",
                     "flood_stage":"95%",
                     "high":"90%"
                  },
                  "include_relocations":"true",
                  "reroute_interval":"60s"
               },
               "awareness":{
                  "attributes":[
                     
                  ]
               },
               "balance":{
                  "index":"0.55",
                  "threshold":"1.0",
                  "shard":"0.45"
               },
               "enable":"all",
               "node_concurrent_outgoing_recoveries":"2",
               "allow_rebalance":"indices_all_active",
               "cluster_concurrent_rebalance":"2",
               "node_concurrent_recoveries":"2"
            }
         },
         "indices":{
            "tombstones":{
               "size":"500"
            },
            "close":{
               "enable":"true"
            }
         },
         "nodes":{
            "reconnect_interval":"10s"
         },
         "persistent_tasks":{
            "allocation":{
               "enable":"all"
            }
         },
         "blocks":{
            "read_only_allow_delete":"false",
            "read_only":"false"
         },
         "service":{
            "slow_task_logging_threshold":"30s"
         },
         "name":"roseland",
         "info":{
            "update":{
               "interval":"30s",
               "timeout":"15s"
            }
         }
      },
      "no":{
         "model":{
            "state":{
               "persist":"false"
            }
         }
      },
      "logger":{
         "level":"INFO"
      },
      "bootstrap":{
         "memory_lock":"false",
         "system_call_filter":"false",
         "ctrlhandler":"true"
      },
      "processors":"2",
      "ingest":{
         "grok":{
            "watchdog":{
               "max_execution_time":"1s",
               "interval":"1s"
            }
         }
      },
      "network":{
         "host":[
            "_local:ipv4_",
            "_eth0:ipv4_"
         ],
         "tcp":{
            "reuse_address":"true",
            "keep_alive":"true",
            "connect_timeout":"30s",
            "receive_buffer_size":"-1b",
            "no_delay":"true",
            "send_buffer_size":"-1b"
         },
         "bind_host":[
            "_local:ipv4_",
            "_eth0:ipv4_"
         ],
         "server":"true",
         "breaker":{
            "inflight_requests":{
               "limit":"100%",
               "overhead":"1.0"
            }
         },
         "publish_host":[
            "_local:ipv4_",
            "_eth0:ipv4_"
         ]
      },
      "pidfile":"/var/run/elasticsearch/elasticsearch.pid",
      "path":{
         "data":[
            "/data/elasticsearch"
         ],
         "logs":"/var/log/elasticsearch",
         "shared_data":"",
         "home":"/usr/share/elasticsearch",
         "repo":[
            
         ]
      },
      "search":{
         "default_search_timeout":"-1",
         "highlight":{
            "term_vector_multi_value":"true"
         },
         "default_allow_partial_results":"true",
         "max_buckets":"-1",
         "low_level_cancellation":"false",
         "keep_alive_interval":"1m",
         "remote":{
            "node":{
               "attr":""
            },
            "initial_connect_timeout":"30s",
            "connect":"true",
            "connections_per_cluster":"3"
         },
         "default_keep_alive":"5m",
         "max_keep_alive":"24h"
      },
      "security":{
         "manager":{
            "filter_bad_defaults":"true"
         }
      },
      "repositories":{
         "fs":{
            "compress":"false",
            "chunk_size":"9223372036854775807b",
            "location":""
         },
         "url":{
            "supported_protocols":[
               "http",
               "https",
               "ftp",
               "file",
               "jar"
            ],
            "allowed_urls":[
               
            ],
            "url":"http:"
         }
      },
      "action":{
         "auto_create_index":"true",
         "search":{
            "shard_count":{
               "limit":"9223372036854775807"
            }
         },
         "destructive_requires_name":"false",
         "master":{
            "force_local":"false"
         }
      },
      "client":{
         "type":"node",
         "transport":{
            "ignore_cluster_name":"false",
            "nodes_sampler_interval":"5s",
            "sniff":"false",
            "ping_timeout":"5s"
         }
      },
      "xpack":{
         "watcher":{
            "execution":{
               "scroll":{
                  "size":"0",
                  "timeout":""
               },
               "default_throttle_period":"5s"
            },
            "internal":{
               "ops":{
                  "bulk":{
                     "default_timeout":""
                  },
                  "index":{
                     "default_timeout":""
                  },
                  "search":{
                     "default_timeout":""
                  }
               }
            },
            "thread_pool":{
               "queue_size":"1000",
               "size":"10"
            },
            "index":{
               "rest":{
                  "direct_access":""
               }
            },
            "history":{
               "cleaner_service":{
                  "enabled":"true"
               }
            },
            "trigger":{
               "schedule":{
                  "ticker":{
                     "tick_interval":"500ms"
                  }
               }
            },
            "enabled":"true",
            "input":{
               "search":{
                  "default_timeout":""
               }
            },
            "encrypt_sensitive_data":"false",
            "transform":{
               "search":{
                  "default_timeout":""
               }
            },
            "stop":{
               "timeout":"30s"
            },
            "watch":{
               "scroll":{
                  "size":"0"
               }
            },
            "require_manual_start":"false",
            "actions":{
               "bulk":{
                  "default_timeout":""
               },
               "index":{
                  "default_timeout":""
               }
            }
         },
         "license":{
            "self_generated":{
               "type":"basic"
            }
         },
         "logstash":{
            "enabled":"true"
         },
         "notification":{
            "hipchat":{
               "host":"",
               "port":"443",
               "default_account":""
            },
            "pagerduty":{
               "default_account":""
            },
            "email":{
               "default_account":"",
               "html":{
                  "sanitization":{
                     "allow":[
                        "body",
                        "head",
                        "_tables",
                        "_links",
                        "_blocks",
                        "_formatting",
                        "img:embedded"
                     ],
                     "disallow":[
                        
                     ],
                     "enabled":"true"
                  }
               }
            },
            "reporting":{
               "retries":"40",
               "interval":"15s"
            },
            "jira":{
               "default_account":""
            },
            "slack":{
               "default_account":""
            }
         },
         "security":{
            "dls_fls":{
               "enabled":"true"
            },
            "transport":{
               "filter":{
                  "allow":[
                     
                  ],
                  "deny":[
                     
                  ],
                  "enabled":"true"
               },
               "ssl":{
                  "enabled":"false"
               }
            },
            "enabled":"true",
            "filter":{
               "always_allow_bound_address":"true"
            },
            "encryption":{
               "algorithm":"AES/CTR/NoPadding"
            },
            "audit":{
               "outputs":[
                  "logfile"
               ],
               "index":{
                  "bulk_size":"1000",
                  "rollover":"DAILY",
                  "flush_interval":"1s",
                  "events":{
                     "emit_request_body":"false",
                     "include":[
                        "ACCESS_DENIED",
                        "ACCESS_GRANTED",
                        "ANONYMOUS_ACCESS_DENIED",
                        "AUTHENTICATION_FAILED",
                        "REALM_AUTHENTICATION_FAILED",
                        "CONNECTION_DENIED",
                        "CONNECTION_GRANTED",
                        "TAMPERED_REQUEST",
                        "RUN_AS_DENIED",
                        "RUN_AS_GRANTED",
                        "AUTHENTICATION_SUCCESS"
                     ],
                     "exclude":[
                        
                     ]
                  },
                  "queue_max_size":"10000"
               },
               "enabled":"false",
               "logfile":{
                  "events":{
                     "emit_request_body":"false",
                     "include":[
                        "ACCESS_DENIED",
                        "ACCESS_GRANTED",
                        "ANONYMOUS_ACCESS_DENIED",
                        "AUTHENTICATION_FAILED",
                        "CONNECTION_DENIED",
                        "TAMPERED_REQUEST",
                        "RUN_AS_DENIED",
                        "RUN_AS_GRANTED"
                     ],
                     "exclude":[
                        
                     ]
                  },
                  "prefix":{
                     "emit_node_host_name":"false",
                     "emit_node_name":"true",
                     "emit_node_host_address":"false"
                  }
               }
            },
            "authc":{
               "anonymous":{
                  "authz_exception":"true",
                  "roles":[
                     
                  ],
                  "username":"_anonymous"
               },
               "password_hashing":{
                  "algorithm":"bcrypt"
               },
               "run_as":{
                  "enabled":"true"
               },
               "reserved_realm":{
                  "enabled":"true"
               },
               "token":{
                  "delete":{
                     "interval":"30m",
                     "timeout":"-1"
                  },
                  "enabled":"false",
                  "thread_pool":{
                     "queue_size":"1000",
                     "size":"1"
                  },
                  "timeout":"20m"
               }
            },
            "fips_mode":{
               "enabled":"false"
            },
            "encryption_key":{
               "length":"128",
               "algorithm":"AES"
            },
            "http":{
               "filter":{
                  "allow":[
                     
                  ],
                  "deny":[
                     
                  ],
                  "enabled":"true"
               },
               "ssl":{
                  "enabled":"false"
               }
            },
            "automata":{
               "max_determinized_states":"100000"
            },
            "user":null,
            "authz":{
               "store":{
                  "roles":{
                     "field_permissions":{
                        "cache":{
                           "max_size_in_bytes":"104857600"
                        }
                     },
                     "index":{
                        "cache":{
                           "ttl":"20m",
                           "max_size":"10000"
                        }
                     },
                     "cache":{
                        "max_size":"10000"
                     }
                  }
               }
            }
         },
         "http":{
            "default_connection_timeout":"10s",
            "proxy":{
               "host":"",
               "scheme":"",
               "port":"0"
            },
            "default_read_timeout":"10s",
            "max_response_size":"10mb"
         },
         "monitoring":{
            "enabled":"true",
            "collection":{
               "cluster":{
                  "stats":{
                     "timeout":"10s"
                  }
               },
               "node":{
                  "stats":{
                     "timeout":"10s"
                  }
               },
               "indices":[
                  
               ],
               "index":{
                  "stats":{
                     "timeout":"10s"
                  },
                  "recovery":{
                     "active_only":"false",
                     "timeout":"10s"
                  }
               },
               "interval":"10s",
               "enabled":"false",
               "ml":{
                  "job":{
                     "stats":{
                        "timeout":"10s"
                     }
                  }
               }
            },
            "history":{
               "duration":"168h"
            }
         },
         "graph":{
            "enabled":"true"
         },
         "ml":{
            "utility_thread_pool":{
               "queue_size":"500",
               "size":"80"
            },
            "max_machine_memory_percent":"30",
            "max_open_jobs":"20",
            "min_disk_space_off_heap":"5gb",
            "autodetect_process":"true",
            "datafeed_thread_pool":{
               "queue_size":"200",
               "size":"20"
            },
            "node_concurrent_job_allocations":"2",
            "max_model_memory_limit":"0b",
            "enabled":"true",
            "autodetect_thread_pool":{
               "queue_size":"80",
               "size":"80"
            }
         },
         "rollup":{
            "enabled":"true",
            "task_thread_pool":{
               "queue_size":"4",
               "size":"4"
            }
         },
         "sql":{
            "enabled":"true"
         }
      },
      "rest":{
         "action":{
            "multi":{
               "allow_explicit_index":"true"
            }
         }
      },
      "cache":{
         "recycler":{
            "page":{
               "limit":{
                  "heap":"10%"
               },
               "type":"CONCURRENT",
               "weight":{
                  "longs":"1.0",
                  "ints":"1.0",
                  "bytes":"1.0",
                  "objects":"0.1"
               }
            }
         }
      },
      "reindex":{
         "remote":{
            "whitelist":[
               
            ]
         }
      },
      "max":{
         "percent":{
            "date":{
               "errors":"25"
            },
            "outoforder":{
               "errors":"25"
            }
         },
         "anomaly":{
            "records":"500"
         }
      },
      "resource":{
         "reload":{
            "enabled":"true",
            "interval":{
               "low":"60s",
               "high":"5s",
               "medium":"30s"
            }
         }
      },
      "thread_pool":{
         "force_merge":{
            "queue_size":"-1",
            "size":"1"
         },
         "fetch_shard_started":{
            "core":"1",
            "max":"4",
            "keep_alive":"5m"
         },
         "listener":{
            "queue_size":"-1",
            "size":"1"
         },
         "index":{
            "queue_size":"200",
            "size":"2"
         },
         "refresh":{
            "core":"1",
            "max":"1",
            "keep_alive":"5m"
         },
         "generic":{
            "core":"4",
            "max":"128",
            "keep_alive":"30s"
         },
         "warmer":{
            "core":"1",
            "max":"1",
            "keep_alive":"5m"
         },
         "search":{
            "max_queue_size":"1000",
            "queue_size":"1000",
            "size":"4",
            "auto_queue_frame_size":"2000",
            "target_response_time":"1s",
            "min_queue_size":"1000"
         },
         "fetch_shard_store":{
            "core":"1",
            "max":"4",
            "keep_alive":"5m"
         },
         "flush":{
            "core":"1",
            "max":"1",
            "keep_alive":"5m"
         },
         "management":{
            "core":"1",
            "max":"5",
            "keep_alive":"5m"
         },
         "analyze":{
            "queue_size":"16",
            "size":"1"
         },
         "get":{
            "queue_size":"1000",
            "size":"2"
         },
         "bulk":{
            "queue_size":"200",
            "size":"2"
         },
         "estimated_time_interval":"200ms",
         "write":{
            "queue_size":"200",
            "size":"2"
         },
         "snapshot":{
            "core":"1",
            "max":"1",
            "keep_alive":"5m"
         }
      },
      "index":{
         "codec":"default",
         "store":{
            "type":"",
            "fs":{
               "fs_lock":"native"
            },
            "preload":[
               
            ]
         }
      },
      "monitor":{
         "jvm":{
            "gc":{
               "enabled":"true",
               "overhead":{
                  "warn":"50",
                  "debug":"10",
                  "info":"25"
               },
               "refresh_interval":"1s"
            },
            "refresh_interval":"1s"
         },
         "process":{
            "refresh_interval":"1s"
         },
         "os":{
            "refresh_interval":"1s"
         },
         "fs":{
            "refresh_interval":"1s"
         }
      },
      "transport":{
         "tcp":{
            "reuse_address":"true",
            "connect_timeout":"30s",
            "compress":"false",
            "port":"9300-9400",
            "keep_alive":"true",
            "receive_buffer_size":"-1b",
            "send_buffer_size":"-1b"
         },
         "bind_host":[
            
         ],
         "ping_schedule":"-1",
         "connections_per_node":{
            "recovery":"2",
            "state":"1",
            "bulk":"3",
            "reg":"6",
            "ping":"1"
         },
         "tracer":{
            "include":[
               
            ],
            "exclude":[
               "internal:discovery/zen/fd*",
               "cluster:monitor/nodes/liveness"
            ]
         },
         "type":"security4",
         "type.default":"netty4",
         "features":{
            "x-pack":"true"
         },
         "host":[
            
         ],
         "publish_port":"-1",
         "tcp_no_delay":"true",
         "publish_host":[
            
         ],
         "netty":{
            "receive_predictor_size":"64kb",
            "receive_predictor_max":"64kb",
            "worker_count":"4",
            "receive_predictor_min":"64kb",
            "boss_count":"1"
         }
      },
      "script":{
         "allowed_contexts":[
            
         ],
         "max_compilations_rate":"75/5m",
         "cache":{
            "max_size":"100",
            "expire":"0ms"
         },
         "painless":{
            "regex":{
               "enabled":"false"
            }
         },
         "max_size_in_bytes":"65535",
         "allowed_types":[
            
         ]
      },
      "node":{
         "data":"true",
         "enable_lucene_segment_infos_trace":"false",
         "local_storage":"true",
         "max_local_storage_nodes":"1",
         "name":"node01",
         "id":{
            "seed":"0"
         },
         "attr":{
            "xpack":{
               "installed":"true"
            },
            "ml":{
               "machine_memory":"4075884544",
               "max_open_jobs":"20",
               "enabled":"true"
            }
         },
         "portsfile":"false",
         "ingest":"true",
         "master":"true",
         "ml":"true"
      },
      "indices":{
         "cache":{
            "cleanup_interval":"1m"
         },
         "mapping":{
            "dynamic_timeout":"30s"
         },
         "memory":{
            "interval":"5s",
            "max_index_buffer_size":"-1",
            "shard_inactive_time":"5m",
            "index_buffer_size":"10%",
            "min_index_buffer_size":"48mb"
         },
         "breaker":{
            "request":{
               "limit":"60%",
               "type":"memory",
               "overhead":"1.0"
            },
            "total":{
               "limit":"70%"
            },
            "accounting":{
               "limit":"100%",
               "overhead":"1.0"
            },
            "fielddata":{
               "limit":"60%",
               "type":"memory",
               "overhead":"1.03"
            },
            "type":"hierarchy"
         },
         "fielddata":{
            "cache":{
               "size":"-1b"
            }
         },
         "query":{
            "bool":{
               "max_clause_count":"1024"
            },
            "query_string":{
               "analyze_wildcard":"false",
               "allowLeadingWildcard":"true"
            }
         },
         "admin":{
            "filtered_fields":"true"
         },
         "recovery":{
            "recovery_activity_timeout":"1800000ms",
            "retry_delay_network":"5s",
            "internal_action_timeout":"15m",
            "retry_delay_state_sync":"500ms",
            "internal_action_long_timeout":"1800000ms",
            "max_bytes_per_sec":"40mb"
         },
         "requests":{
            "cache":{
               "size":"1%",
               "expire":"0ms"
            }
         },
         "store":{
            "delete":{
               "shard":{
                  "timeout":"30s"
               }
            }
         },
         "analysis":{
            "hunspell":{
               "dictionary":{
                  "ignore_case":"false",
                  "lazy":"false"
               }
            }
         },
         "queries":{
            "cache":{
               "count":"10000",
               "size":"10%",
               "all_segments":"false"
            }
         }
      },
      "plugin":{
         "mandatory":[
            
         ]
      },
      "max_running_jobs":"20",
      "discovery":{
         "type":"zen",
         "zen":{
            "commit_timeout":"30s",
            "no_master_block":"write",
            "join_retry_delay":"100ms",
            "join_retry_attempts":"3",
            "ping":{
               "unicast":{
                  "concurrent_connects":"10",
                  "hosts":[
                     
                  ],
                  "hosts.resolve_timeout":"5s"
               }
            },
            "master_election":{
               "ignore_non_master_pings":"false",
               "wait_for_joins_timeout":"30000ms"
            },
            "send_leave_request":"true",
            "ping_timeout":"3s",
            "join_timeout":"60000ms",
            "publish_diff":{
               "enable":"true"
            },
            "publish":{
               "max_pending_cluster_states":"25"
            },
            "minimum_master_nodes":"-1",
            "hosts_provider":[
               
            ],
            "publish_timeout":"30s",
            "fd":{
               "connect_on_network_disconnect":"false",
               "ping_interval":"1s",
               "ping_retries":"3",
               "register_connection_listener":"true",
               "ping_timeout":"30s"
            },
            "max_pings_from_another_master":"3"
         },
         "initial_state_timeout":"30s"
      },
      "tribe":{
         "name":"",
         "on_conflict":"any",
         "blocks":{
            "metadata":"false",
            "read":{
               "indices":[
                  
               ]
            },
            "write.indices":[
               
            ],
            "write":"false",
            "metadata.indices":[
               
            ]
         }
      },
      "http":{
         "cors":{
            "max-age":"1728000",
            "allow-origin":"",
            "allow-headers":"X-Requested-With,Content-Type,Content-Length",
            "allow-credentials":"false",
            "allow-methods":"OPTIONS,HEAD,GET,POST,PUT,DELETE",
            "enabled":"false"
         },
         "max_chunk_size":"8kb",
         "compression_level":"3",
         "max_initial_line_length":"4kb",
         "type":"security4",
         "pipelining":"true",
         "enabled":"true",
         "type.default":"netty4",
         "content_type":{
            "required":"true"
         },
         "host":[
            
         ],
         "publish_port":"-1",
         "read_timeout":"0ms",
         "max_content_length":"100mb",
         "netty":{
            "receive_predictor_size":"64kb",
            "max_composite_buffer_components":"69905",
            "receive_predictor_max":"64kb",
            "worker_count":"4",
            "receive_predictor_min":"64kb"
         },
         "tcp":{
            "reuse_address":"true",
            "keep_alive":"true",
            "receive_buffer_size":"-1b",
            "send_buffer_size":"-1b"
         },
         "bind_host":[
            
         ],
         "reset_cookies":"false",
         "max_warning_header_count":"-1",
         "max_warning_header_size":"-1b",
         "detailed_errors":{
            "enabled":"true"
         },
         "port":"9200-9300",
         "max_header_size":"8kb",
         "pipelining.max_events":"10000",
         "tcp_no_delay":"true",
         "compression":"true",
         "publish_host":[
            
         ]
      },
      "gateway":{
         "recover_after_master_nodes":"0",
         "expected_nodes":"-1",
         "recover_after_data_nodes":"-1",
         "expected_data_nodes":"-1",
         "recover_after_time":"0ms",
         "expected_master_nodes":"-1",
         "recover_after_nodes":"-1"
      }
   }
}
0
queroga_vqz 29 oct. 2020 à 19:01

1 réponse

Meilleure réponse

Comme @leandrojmp l'a mentionné dans le commentaire, vous devez fournir beaucoup plus d'informations pour que nous puissions fournir une recommandation spécifique, mais pour le conseil général sur l'amélioration de la réindexation (unique) https://opster.com/blogs/improve-elasticsearch-reindex-performance/ et pour les performances d'indexation continues, suivez https://opster.com/blogs/improve-elasticsearch-indexing-rate/.

1
Martijn Pieters 26 févr. 2021 à 16:55