Add some metrics and logs (#4649)

* Fix service metadata update failed problem.

* random connect to server

* Add some metrics and log for service and client

* Add some metrics and log for naming task worker
This commit is contained in:
杨翊 SionYang 2021-01-07 17:04:26 +08:00 committed by GitHub
parent afb1a28135
commit ad1a6d1b65
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 48 additions and 7 deletions

View File

@ -39,6 +39,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import java.util.Random;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
@ -73,6 +74,9 @@ public class ServerListManager implements ServerListFactory, Closeable {
public ServerListManager(Properties properties) {
initServerAddr(properties);
if (!serverList.isEmpty()) {
currentIndex.set(new Random().nextInt(serverList.size()));
}
}
private void initServerAddr(Properties properties) {

View File

@ -97,7 +97,9 @@ public class ServiceManager {
* @return removed service
*/
public Service removeSingleton(Service service) {
if (namespaceSingletonMaps.containsKey(service.getNamespace())) {
namespaceSingletonMaps.get(service.getNamespace()).remove(service);
}
return singletonRepository.remove(service);
}

View File

@ -20,6 +20,8 @@ import com.alibaba.nacos.common.notify.NotifyCenter;
import com.alibaba.nacos.naming.core.v2.event.client.ClientEvent;
import com.alibaba.nacos.naming.core.v2.pojo.InstancePublishInfo;
import com.alibaba.nacos.naming.core.v2.pojo.Service;
import com.alibaba.nacos.naming.misc.Loggers;
import com.alibaba.nacos.naming.monitor.MetricsMonitor;
import com.alibaba.nacos.naming.pojo.Subscriber;
import java.util.Collection;
@ -57,15 +59,22 @@ public abstract class AbstractClient implements Client {
@Override
public boolean addServiceInstance(Service service, InstancePublishInfo instancePublishInfo) {
publishers.put(service, instancePublishInfo);
if (null == publishers.put(service, instancePublishInfo)) {
MetricsMonitor.getIpCountMonitor().incrementAndGet();
}
NotifyCenter.publishEvent(new ClientEvent.ClientChangedEvent(this));
Loggers.SRV_LOG.info("Client change for service {}, {}", service, getClientId());
return true;
}
@Override
public InstancePublishInfo removeServiceInstance(Service service) {
InstancePublishInfo result = publishers.remove(service);
if (null != result) {
MetricsMonitor.getIpCountMonitor().decrementAndGet();
}
NotifyCenter.publishEvent(new ClientEvent.ClientChangedEvent(this));
Loggers.SRV_LOG.info("Client remove for service {}, {}", service, getClientId());
return result;
}

View File

@ -25,6 +25,7 @@ import com.alibaba.nacos.consistency.entity.Response;
import com.alibaba.nacos.consistency.entity.WriteRequest;
import com.alibaba.nacos.consistency.snapshot.SnapshotOperation;
import com.alibaba.nacos.core.distributed.ProtocolManager;
import com.alibaba.nacos.core.utils.Loggers;
import com.alibaba.nacos.naming.core.v2.ServiceManager;
import com.alibaba.nacos.naming.core.v2.pojo.Service;
import com.alibaba.nacos.naming.utils.Constants;
@ -94,6 +95,7 @@ public class ServiceMetadataProcessor extends RequestProcessor4CP {
}
return Response.newBuilder().setSuccess(true).build();
} catch (Exception e) {
Loggers.RAFT.error("apply service metadata error: ", e);
return Response.newBuilder().setSuccess(false).setErrMsg(e.getMessage()).build();
} finally {
readLock.unlock();

View File

@ -42,4 +42,8 @@ public class NamingExecuteTaskDispatcher {
public void dispatchAndExecuteTask(Object dispatchTag, AbstractExecuteTask task) {
executeEngine.addTask(dispatchTag, task);
}
public String workersStatus() {
return executeEngine.workersStatus();
}
}

View File

@ -146,6 +146,14 @@ public class MetricsMonitor {
INSTANCE.failedPush.incrementAndGet();
}
public static void incrementInstanceCount() {
INSTANCE.ipCount.incrementAndGet();
}
public static void decrementInstanceCount() {
INSTANCE.ipCount.decrementAndGet();
}
public static Counter getDiskException() {
return Metrics.counter("nacos_exception", "module", "naming", "name", "disk");
}

View File

@ -22,6 +22,7 @@ import com.alibaba.nacos.naming.consistency.persistent.raft.RaftPeer;
import com.alibaba.nacos.naming.core.ServiceManager;
import com.alibaba.nacos.naming.misc.GlobalExecutor;
import com.alibaba.nacos.naming.misc.Loggers;
import com.alibaba.nacos.naming.misc.NamingExecuteTaskDispatcher;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
@ -47,7 +48,7 @@ public class PerformanceLoggerThread {
@Autowired
private ClusterVersionJudgement versionJudgement;
private static final long PERIOD = 5 * 60;
private static final long PERIOD = 1 * 60;
@PostConstruct
public void init() {
@ -72,8 +73,9 @@ public class PerformanceLoggerThread {
*/
@Scheduled(cron = "0/15 * * * * ?")
public void collectMetrics() {
MetricsMonitor.getDomCountMonitor().set(serviceManager.getServiceCount());
MetricsMonitor.getIpCountMonitor().set(serviceManager.getInstanceCount());
// MetricsMonitor.getDomCountMonitor().set(serviceManager.getServiceCount());
// MetricsMonitor.getIpCountMonitor().set(serviceManager.getInstanceCount());
MetricsMonitor.getDomCountMonitor().set(com.alibaba.nacos.naming.core.v2.ServiceManager.getInstance().size());
MetricsMonitor.getAvgPushCostMonitor().set(getAvgPushCost());
metricsRaftLeader();
}
@ -96,11 +98,18 @@ public class PerformanceLoggerThread {
class PerformanceLogTask implements Runnable {
private int logCount = 0;
@Override
public void run() {
try {
int serviceCount = serviceManager.getServiceCount();
int ipCount = serviceManager.getInstanceCount();
logCount %= 10;
if (logCount == 0) {
Loggers.PERFORMANCE_LOG
.info("PERFORMANCE:|serviceCount|ipCount|maxPushCost|avgPushCost|totalPushCount|failPushCount");
}
int serviceCount = com.alibaba.nacos.naming.core.v2.ServiceManager.getInstance().size();
int ipCount = MetricsMonitor.getIpCountMonitor().get();
long maxPushCost = MetricsMonitor.getMaxPushCostMonitor().get();
long avgPushCost = getAvgPushCost();
long totalPushCount = MetricsMonitor.getTotalPushMonitor().longValue();
@ -108,6 +117,9 @@ public class PerformanceLoggerThread {
Loggers.PERFORMANCE_LOG
.info("PERFORMANCE:|{}|{}|{}|{}|{}|{}", serviceCount, ipCount, maxPushCost, avgPushCost,
totalPushCount, failPushCount);
Loggers.PERFORMANCE_LOG
.info("Task worker status: \n" + NamingExecuteTaskDispatcher.getInstance().workersStatus());
logCount++;
MetricsMonitor.getTotalPushCountForAvg().set(0);
MetricsMonitor.getTotalPushCostForAvg().set(0);
MetricsMonitor.getMaxPushCostMonitor().set(-1);