failover triggered mistakenly (#11536)

* Fix failover switch triggered mistakenly and optimise metrics

* format code

* Fix checkstyle

* Fix unit test
This commit is contained in:
Peter Zhu 2024-03-07 11:40:41 +08:00 committed by GitHub
parent 4397e8d275
commit 2f2fc1b12b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 32 additions and 20 deletions

View File

@ -26,13 +26,17 @@ import com.alibaba.nacos.common.notify.NotifyCenter;
import com.alibaba.nacos.common.spi.NacosServiceLoader; import com.alibaba.nacos.common.spi.NacosServiceLoader;
import com.alibaba.nacos.common.utils.JacksonUtils; import com.alibaba.nacos.common.utils.JacksonUtils;
import com.alibaba.nacos.common.utils.ThreadUtils; import com.alibaba.nacos.common.utils.ThreadUtils;
import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.Meter;
import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.Metrics;
import io.micrometer.core.instrument.Meter;
import io.micrometer.core.instrument.Tag;
import io.micrometer.core.instrument.ImmutableTag;
import io.micrometer.core.instrument.Gauge;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.HashMap;
import java.util.Collection;
import java.util.List;
import java.util.ArrayList;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.ScheduledThreadPoolExecutor;
@ -112,7 +116,7 @@ public class FailoverReactor implements Closeable {
} }
if (failoverMap.size() > 0) { if (failoverMap.size() > 0) {
failoverServiceCntMetrics(failoverMap); failoverServiceCntMetrics();
serviceMap = failoverMap; serviceMap = failoverMap;
} }
@ -150,6 +154,10 @@ public class FailoverReactor implements Closeable {
return failoverSwitchEnable; return failoverSwitchEnable;
} }
public boolean isFailoverSwitch(String serviceName) {
return failoverSwitchEnable && serviceMap.containsKey(serviceName) && serviceMap.get(serviceName).ipCount() > 0;
}
public ServiceInfo getService(String key) { public ServiceInfo getService(String key) {
ServiceInfo serviceInfo = serviceMap.get(key); ServiceInfo serviceInfo = serviceMap.get(key);
@ -174,15 +182,16 @@ public class FailoverReactor implements Closeable {
NAMING_LOGGER.info("{} do shutdown stop", className); NAMING_LOGGER.info("{} do shutdown stop", className);
} }
private void failoverServiceCntMetrics(Map<String, ServiceInfo> failoverMap) { private void failoverServiceCntMetrics() {
try { try {
for (Map.Entry<String, ServiceInfo> entry : failoverMap.entrySet()) { for (Map.Entry<String, ServiceInfo> entry : serviceMap.entrySet()) {
String serviceName = entry.getKey(); String serviceName = entry.getKey();
Gauge register = Gauge List<Tag> tags = new ArrayList<>();
.builder("nacos_naming_client_failover_instances", failoverMap.get(serviceName).ipCount(), tags.add(new ImmutableTag("service_name", serviceName));
Integer::intValue).tag("service_name", serviceName) if (Metrics.globalRegistry.find("nacos_naming_client_failover_instances").tags(tags).gauge() == null) {
.description("Nacos failover data service count").register(Metrics.globalRegistry); Gauge.builder("nacos_naming_client_failover_instances", () -> serviceMap.get(serviceName).ipCount())
meterMap.put(serviceName, register); .tags(tags).register(Metrics.globalRegistry);
}
} }
} catch (Exception e) { } catch (Exception e) {
NAMING_LOGGER.info("[NA] registerFailoverServiceCnt fail.", e); NAMING_LOGGER.info("[NA] registerFailoverServiceCnt fail.", e);
@ -191,10 +200,13 @@ public class FailoverReactor implements Closeable {
private void failoverServiceCntMetricsClear() { private void failoverServiceCntMetricsClear() {
try { try {
for (Map.Entry<String, Meter> entry : meterMap.entrySet()) { for (Map.Entry<String, ServiceInfo> entry : serviceMap.entrySet()) {
Metrics.globalRegistry.remove(entry.getValue()); Gauge gauge = Metrics.globalRegistry.find("nacos_naming_client_failover_instances")
.tag("service_name", entry.getKey()).gauge();
if (gauge != null) {
Metrics.globalRegistry.remove(gauge);
}
} }
meterMap.clear();
} catch (Exception e) { } catch (Exception e) {
NAMING_LOGGER.info("[NA] registerFailoverServiceCnt fail.", e); NAMING_LOGGER.info("[NA] registerFailoverServiceCnt fail.", e);
} }

View File

@ -143,7 +143,7 @@ public class ServiceInfoHolder implements Closeable {
if (changed) { if (changed) {
NAMING_LOGGER.info("current ips:({}) service: {} -> {}", serviceInfo.ipCount(), serviceInfo.getKey(), NAMING_LOGGER.info("current ips:({}) service: {} -> {}", serviceInfo.ipCount(), serviceInfo.getKey(),
JacksonUtils.toJson(serviceInfo.getHosts())); JacksonUtils.toJson(serviceInfo.getHosts()));
if (!failoverReactor.isFailoverSwitch()) { if (!failoverReactor.isFailoverSwitch(serviceKey)) {
NotifyCenter.publishEvent( NotifyCenter.publishEvent(
new InstancesChangeEvent(notifierEventScope, serviceInfo.getName(), serviceInfo.getGroupName(), new InstancesChangeEvent(notifierEventScope, serviceInfo.getName(), serviceInfo.getGroupName(),
serviceInfo.getClusters(), serviceInfo.getHosts())); serviceInfo.getClusters(), serviceInfo.getHosts()));

View File

@ -135,9 +135,9 @@ public class FailoverReactorTest {
@Test @Test
public void testFailoverServiceCntMetrics() public void testFailoverServiceCntMetrics()
throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
Method method = FailoverReactor.class.getDeclaredMethod("failoverServiceCntMetrics", Map.class); Method method = FailoverReactor.class.getDeclaredMethod("failoverServiceCntMetrics");
method.setAccessible(true); method.setAccessible(true);
method.invoke(failoverReactor, new Object[1]); method.invoke(failoverReactor);
// No exception // No exception
} }