通过Go channel批量读取数据的示例详解_Golang

引言

在 go 语言中，我们可以利用 channel 作为数据的传输通道，通过定期批量读取 channel 中的数据，并将这些数据批量发送到 kafka 或者进行网络写入。这样可以提高系统的性能，减少单个请求的网络开销。

批量处理的主要逻辑是：从 channel 中接收数据，积累到一定数量或者达到时间限制后，将数据批量处理（例如发送到 kafka 或者写入网络）。

下面我将展示一个从 go channel 中批量读取数据，并批量发送到 kafka 和批量写入网络数据的示例。

1. 批量读取 go channel 的通用逻辑

批量读取 go channel 的通用逻辑可以通过一个定时器和一个缓冲区来实现：

当缓冲区的数量达到预定值时，执行批量操作。
当时间超过某个预定时间间隔时，即使缓冲区未满，也进行批量处理。

package main

import (
	"fmt"
	"time"
)

func batchprocessor(ch <-chan string, batchsize int, flushinterval time.duration) {
	var batch []string
	timer := time.newtimer(flushinterval)

	for {
		select {
		case data := <-ch:
			batch = append(batch, data)
			// 当缓冲区达到批量大小时处理
			if len(batch) >= batchsize {
				fmt.printf("processing batch: %v\n", batch)
				batch = nil
				// 重置定时器
				timer.reset(flushinterval)
			}

		case <-timer.c:
			// 如果达到时间间隔，但 batch 不为空，也进行处理
			if len(batch) > 0 {
				fmt.printf("processing batch on timer: %v\n", batch)
				batch = nil
			}
			// 重置定时器
			timer.reset(flushinterval)
		}
	}
}

func main() {
	datachannel := make(chan string)
	batchsize := 5
	flushinterval := 3 * time.second

	// 启动批量处理协程
	go batchprocessor(datachannel, batchsize, flushinterval)

	// 模拟向 channel 发送数据
	for i := 1; i <= 10; i++ {
		datachannel <- fmt.sprintf("data-%d", i)
		time.sleep(1 * time.second)
	}

	// 让主程序暂停一会，以便查看处理结果
	time.sleep(5 * time.second)
}

上面的代码展示了从 channel 中批量读取数据的基本机制：

缓冲大小：当缓冲区满时触发批量处理。
时间间隔：当到达指定的时间间隔时，即使缓冲区未满，也触发批量处理。

2. 批量发送数据到 kafka

我们可以在批量处理逻辑的基础上，利用 kafka 客户端库实现批量发送消息到 kafka。

使用 github.com/shopify/sarama 是 go 中常用的 kafka 客户端库。首先安装它：

go get github.com/shopify/sarama

然后实现批量发送数据到 kafka 的示例：

package main

import (
	"fmt"
	"log"
	"time"

	"github.com/shopify/sarama"
)

// 初始化 kafka 生产者
func initkafkaproducer(brokers []string) sarama.syncproducer {
	config := sarama.newconfig()
	config.producer.return.successes = true
	producer, err := sarama.newsyncproducer(brokers, config)
	if err != nil {
		log.fatalf("failed to start kafka producer: %v", err)
	}
	return producer
}

// 批量发送消息到 kafka
func sendbatchtokafka(producer sarama.syncproducer, topic string, messages []string) {
	var kafkamessages []*sarama.producermessage
	for _, msg := range messages {
		kafkamessages = append(kafkamessages, &sarama.producermessage{
			topic: topic,
			value: sarama.stringencoder(msg),
		})
	}

	err := producer.sendmessages(kafkamessages)
	if err != nil {
		log.printf("failed to send messages: %v", err)
	} else {
		log.printf("successfully sent batch to kafka: %v", messages)
	}
}

// 批量处理 kafka 消息
func kafkabatchprocessor(producer sarama.syncproducer, topic string, ch <-chan string, batchsize int, flushinterval time.duration) {
	var batch []string
	timer := time.newtimer(flushinterval)

	for {
		select {
		case msg := <-ch:
			batch = append(batch, msg)
			if len(batch) >= batchsize {
				sendbatchtokafka(producer, topic, batch)
				batch = nil
				timer.reset(flushinterval)
			}

		case <-timer.c:
			if len(batch) > 0 {
				sendbatchtokafka(producer, topic, batch)
				batch = nil
			}
			timer.reset(flushinterval)
		}
	}
}

func main() {
	// kafka broker 和 topic 配置
	brokers := []string{"localhost:9092"}
	topic := "test_topic"

	// 初始化 kafka 生产者
	producer := initkafkaproducer(brokers)
	defer producer.close()

	datachannel := make(chan string)
	batchsize := 5
	flushinterval := 3 * time.second

	// 启动 kafka 批量处理协程
	go kafkabatchprocessor(producer, topic, datachannel, batchsize, flushinterval)

	// 模拟向 channel 发送数据
	for i := 1; i <= 10; i++ {
		datachannel <- fmt.sprintf("message-%d", i)
		time.sleep(1 * time.second)
	}

	// 让主程序暂停一会以便查看处理结果
	time.sleep(5 * time.second)
}

在这个示例中：

kafkabatchprocessor 函数批量从 channel 中读取数据，并在批量大小达到或时间间隔到达时，将消息发送到 kafka。
使用了 sarama.syncproducer 来确保消息批量发送成功。

3. 批量写入网络数据

同样的逻辑可以用来批量写入网络数据。比如，将数据批量写入到某个 http api。

这里我们使用 go 的 net/http 来实现批量发送 http 请求：

package main

import (
	"bytes"
	"fmt"
	"log"
	"net/http"
	"time"
)

// 批量发送 http 请求
func sendbatchtoapi(endpoint string, batch []string) {
	// 构造请求体
	var requestbody bytes.buffer
	for _, data := range batch {
		requestbody.writestring(fmt.sprintf("%s\n", data))
	}

	// 发送 http post 请求
	resp, err := http.post(endpoint, "text/plain", &requestbody)
	if err != nil {
		log.printf("failed to send batch: %v", err)
		return
	}
	defer resp.body.close()

	log.printf("successfully sent batch to api: %v", batch)
}

// 批量处理 http 请求
func httpbatchprocessor(endpoint string, ch <-chan string, batchsize int, flushinterval time.duration) {
	var batch []string
	timer := time.newtimer(flushinterval)

	for {
		select {
		case msg := <-ch:
			batch = append(batch, msg)
			if len(batch) >= batchsize {
				sendbatchtoapi(endpoint, batch)
				batch = nil
				timer.reset(flushinterval)
			}

		case <-timer.c:
			if len(batch) > 0 {
				sendbatchtoapi(endpoint, batch)
				batch = nil
			}
			timer.reset(flushinterval)
		}
	}
}

func main() {
	// api endpoint
	apiendpoint := "http://localhost:8080/receive"

	datachannel := make(chan string)
	batchsize := 5
	flushinterval := 3 * time.second

	// 启动 http 批量处理协程
	go httpbatchprocessor(apiendpoint, datachannel, batchsize, flushinterval)

	// 模拟向 channel 发送数据
	for i := 1; i <= 10; i++ {
		datachannel <- fmt.sprintf("data-%d", i)
		time.sleep(1 * time.second)
	}

	// 让主程序暂停一会以便查看处理结果
	time.sleep(5 * time.second)
}