目录
介绍
github地址:https://github.com/sandrohanea/whisper.net
whisper.net. speech to text made simple using whisper models
模型下载地址:https://huggingface.co/sandrohanea/whisper.net/tree/main/classic
效果
输出信息
whisper_init_from_file_no_state: loading model from 'ggml-small.bin'
whisper_model_load: loading model
whisper_model_load: n_vocab = 51865
whisper_model_load: n_audio_ctx = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx = 448
whisper_model_load: n_text_state = 768
whisper_model_load: n_text_head = 12
whisper_model_load: n_text_layer = 12
whisper_model_load: n_mels = 80
whisper_model_load: ftype = 1
whisper_model_load: qntvr = 0
whisper_model_load: type = 3
whisper_model_load: mem required = 743.00 mb (+ 16.00 mb per decoder)
whisper_model_load: adding 1608 extra tokens
whisper_model_load: model ctx = 464.68 mb
whisper_model_load: model size = 464.44 mb
whisper_init_state: kv self size = 15.75 mb
whisper_init_state: kv cross size = 52.73 mb
00:00:00->00:00:20: 皇鶴楼,崔昊,西人已成皇鶴去,此地空于皇鶴楼,皇鶴一去不复返,白云千载空悠悠。
00:00:20->00:00:39: 青川莉莉汉阳树,方草七七英五周,日暮相关何处事,燕泊江上世人愁。
项目
代码
using system;
using system.collections.generic;
using system.componentmodel;
using system.data;
using system.drawing;
using system.io;
using system.linq;
using system.text;
using system.threading.tasks;
using system.windows.forms;
using whisper.net;
using static system.net.mime.mediatypenames;
namespace c_使用whisper.net实现语音转文本
{
public partial class form1 : form
{
public form1()
{
initializecomponent();
}
string filefilter = "*.wav|*.wav";
string wavfilename = "";
whisperfactory whisperfactory;
whisperprocessor processor;
private async void button2_click(object sender, eventargs e)
{
if (wavfilename == "")
{
return;
}
try
{
button2.enabled = false;
using var filestream = file.openread(wavfilename);
await foreach (var result in processor.processasync(filestream))
{
console.writeline($"{result.start}->{result.end}: {result.text}\r\n");
txtresult.text += $"{result.start}->{result.end}: {result.text}\r\n";
}
}
catch (exception ex)
{
messagebox.show(ex.message);
}
finally
{
button2.enabled = true;
}
}
private void form1_load(object sender, eventargs e)
{
whisperfactory = whisperfactory.frompath("ggml-small.bin");
processor = whisperfactory.createbuilder()
.withlanguage("zh")//.withlanguage("auto")
.build();
wavfilename = "085黄鹤楼.wav";
txtfilename.text = wavfilename;
}
private void button1_click(object sender, eventargs e)
{
openfiledialog ofd = new openfiledialog();
ofd.filter = filefilter;
if (ofd.showdialog() != dialogresult.ok) return;
txtresult.text = "";
wavfilename = ofd.filename;
txtfilename.text = wavfilename;
}
}
}
发表评论