有4个文件
game.py 五子棋游戏
mod.py 神经网络模型
xl.py 训练的代码
aigame.py 玩家与对战的五子棋
game.py
class game:
def __init__(self, h, w):
# 行数
self.h = h
# 列数
self.w = w
# 棋盘
self.l = [['-' for _ in range(w)] for _ in range(h)]
# 当前玩家 - 表示空 x先下 然后是o
self.cur = 'x'
# 游戏胜利者
self.win_user = none
# 检查下完这步后有没有赢 y是行 x是列 返回true表示赢
def check_win(self, y, x):
directions = [
# 水平、垂直、两个对角线方向
(1, 0), (0, 1), (1, 1), (1, -1)
]
player = self.l[y][x]
for dy, dx in directions:
count = 0
# 检查四个方向上的连续相同棋子
for i in range(-4, 5): # 检查-4到4的范围,因为五子连珠需要5个棋子
ny, nx = y + i * dy, x + i * dx
if 0 <= ny < self.h and 0 <= nx < self.w and self.l[ny][nx] == player:
count += 1
if count == 5:
return true
else:
count = 0
return false
# 检查能不能下这里 y行 x列 返回true表示能下
def check(self, y, x):
return self.l[y][x] == '-' and self.win_user is none
# 打印棋盘 可视化用得到
def __str__(self):
# 确定行号和列号的宽度
row_width = len(str(self.h - 1))
col_width = len(str(self.w - 1))
# 生成带有行号和列号的棋盘字符串表示
result = []
# 添加列号标题
result.append(' ' * (row_width + 1) + ' '.join(f'{i:>{col_width}}' for i in range(self.w)))
# 添加分隔线(可选)
result.append(' ' * (row_width + 1) + '-' * (col_width * self.w))
# 添加棋盘行
for y, row in enumerate(self.l):
# 添加行号
result.append(f'{y:>{row_width}} ' + ' '.join(f'{cell:>{col_width}}' for cell in row))
return '\n'.join(result)
# 一步棋
def set(self, y, x):
if self.win_user or not self.check(y, x):
return false
self.l[y][x] = self.cur
if self.check_win(y, x):
self.win_user = self.cur
return true
self.cur = 'x' if self.cur == 'o' else 'o'
return true
#和棋
def heqi(self):
for y in range(self.h):
for x in range(self.w):
if self.l[y][x]=='-':
return false
return true
#玩家自己下
def run_game01():
g = game(15, 15)
while not g.win_user:
# 打印当前棋盘状态
while 1:
print(g)
try:
y,x=input(g.cur+':').split(',')
x=int(x)
y=int(y)
if g.set(y,x):
break
except exception as e:
print(e)
print(g)
print('胜利者',g.win_user)
mod.py
import torch
import torch.nn as nn
import torch.optim as optim
from game import game
class mymod(nn.module):
def __init__(self, input_channels=1, output_size=15*15):
super(mymod, self).__init__()
# 定义卷积层,用于提取特征
self.conv1 = nn.conv2d(input_channels, 32, kernel_size=3, padding=1) # 输出 32 x 15 x 15
self.conv2 = nn.conv2d(32, 64, kernel_size=3, padding=1) # 输出 64 x 15 x 15
self.conv3 = nn.conv2d(64, 128, kernel_size=3, padding=1) # 输出 128 x 15 x 15
# 定义全连接层,用于最后的得分预测
self.fc1 = nn.linear(128 * 15 * 15, 1024) # 展平后传入全连接层
self.fc2 = nn.linear(1024, output_size) # 输出 15*15 的得分预测
def forward(self, x):
# 卷积层 -> 激活函数 -> 最大池化
x = torch.relu(self.conv1(x))
x = torch.relu(self.conv2(x))
x = torch.relu(self.conv3(x))
# 将卷积层输出展平为一维
x = x.view(x.size(0), -1)
# 全连接层
x = torch.relu(self.fc1(x))
x = self.fc2(x)
return x
# 保存模型权重
def save(self, path):
torch.save(self.state_dict(), path)
# 加载模型权重
def load(self, path):
self.load_state_dict(torch.load(path))
#改进一下 output 把有棋子的地方的概率=0避免下这些地方
# 输入game对象和mymod对象,用于得到概率最大的落棋点 (行y, 列x)
def input_qi(g: game, m: mymod):
# 获取当前棋盘状态
board_state = g.l # 使用 game.l 获取当前棋盘的状态 (15x15的二维列表)
# 将棋盘状态转换为pytorch的tensor并增加一个维度(batch_size = 1)
board_tensor = torch.tensor([[1 if cell == 'x' else -1 if cell == 'o' else 0 for cell in row] for row in board_state],
dtype=torch.float32).unsqueeze(0).unsqueeze(0) # 形状变为 (1, 1, 15, 15)
# 传入模型获取每个位置的得分
output = m(board_tensor)
# 将输出转为概率值(可以使用softmax来归一化)
probabilities = torch.softmax(output, dim=-1).view(g.h, g.w).detach().numpy() # 变为 (15, 15) 大小
# 将已有棋子的位置的概率设置为 -inf,避免选择这些位置
for y in range(g.h):
for x in range(g.w):
if board_state[y][x] != '-':
probabilities[y, x] = -float('inf') # 设置已经有棋子的地方的概率为 -inf
# 找到概率最大的落子点
max_prob_pos = divmod(probabilities.argmax(), g.w) # 得到最大概率的行列坐标
# 确保返回的是合法的位置
y, x = max_prob_pos
return (y, x), output # 返回坐标和模型输出
xl.py
import os
import torch
import torch.optim as optim
import torch.nn.functional as f
from mod import mymod, input_qi, game
# 两个权重文件,分别代表 x 棋和 o 棋
mx = 'mx'
mo = 'mo'
# 加载模型,若文件不存在则初始化
def load_model(model, path):
if os.path.exists(path):
model.load(path)
print(f"loaded model from {path}")
else:
print(f"{path} not found, initializing new model.")
# 这里可以加一些初始化模型的代码,例如:
# model.apply(init_weights) 如果需要初始化权重
# 初始化模型
modx = mymod()
load_model(modx, mx)
modo = mymod()
load_model(modo, mo)
# 定义优化器
lr=0.001
optimizer_x = optim.adam(modx.parameters(), lr=lr)
optimizer_o = optim.adam(modo.parameters(), lr=lr)
# 损失函数:根据游戏结果调整损失
def compute_loss(winner: int, player: str, model_output):
# 将目标值转换为相应的张量
if player == "x":
if winner == 1: # x 胜
target = torch.tensor(1.0, dtype=torch.float32)
elif winner == 0: # 平局
target = torch.tensor(0.5, dtype=torch.float32)
else: # x 输
target = torch.tensor(0.0, dtype=torch.float32)
else:
if winner == -1: # o 胜
target = torch.tensor(1.0, dtype=torch.float32)
elif winner == 0: # 平局
target = torch.tensor(0.5, dtype=torch.float32)
else: # o 输
target = torch.tensor(0.0, dtype=torch.float32)
# 确保目标值的形状和 model_output 一致,假设 model_output 是单一的值
target = target.unsqueeze(0).unsqueeze(0) # 形状变为 (1, 1)
# 使用均方误差损失计算
return f.mse_loss(model_output, target)
# 训练模型的过程
def train_game():
modx.train()
modo.train()
# 创建新的游戏实例
game = game(15, 15) # 默认是 15x15 棋盘
# 反向传播和优化
optimizer_x.zero_grad()
optimizer_o.zero_grad()
while not game.win_user: # 游戏未结束
# x 方落子
x_move, x_output = input_qi(game, modx) # 获取落子位置和模型输出(x_output 是模型的输出)
game.set(x_move[0], x_move[1]) # x 下棋
if game.win_user:
break
# o 方落子
o_move, o_output = input_qi(game, modo) # 获取落子位置和模型输出(o_output 是模型的输出)
#print(o_move,game)
game.set(o_move[0], o_move[1]) # o 下棋
# 获取比赛结果
winner = 0 if game.heqi() else (1 if game.win_user == 'x' else -1) # 1为x胜,-1为o胜,0为平局
# 计算损失
loss_x = compute_loss(winner, "x", x_output) # 传递模型输出给计算损失函数
loss_o = compute_loss(winner, "o", o_output) # 传递模型输出给计算损失函数
# 计算损失并进行反向传播
loss_x.backward()
loss_o.backward()
# 更新权重
optimizer_x.step()
optimizer_o.step()
print(game)
return loss_x.item(), loss_o.item()
# 训练多个回合
def train(num_epochs,n):
k=0
for epoch in range(num_epochs):
loss_x, loss_o = train_game()
print(f"epoch [{epoch+1}/{num_epochs}], loss x: {loss_x}, loss o: {loss_o}")
k+=1
if k==n:
modo.save('mo')
modx.save('mx')
print('saved')
k=0
# 开始训练
train(50000,1000)
aigame.py
from game import game
from mod import mymod,input_qi
#玩家下x ai下o
def playx():
m=mymod()
m.load('mo')
g=game(15,15)
while 1:
print(g)
if g.heqi() or g.win_user:
break
while 1:
try:
r=input('x:')
y,x=r.split(',')
y=int(y)
x=int(x)
if g.set(y,x):
break
except exception as e:
print(e)
if g.heqi() or g.win_user:
break
while 1:
(y,x),_=input_qi(g,m)
if g.set(y,x):
break
print(g)
print('winner',g.win_user)
#玩家下o ai下x
def playo():
m=mymod()
m.load('mx')
g=game(15,15)
while 1:
if g.heqi() or g.win_user:
break
while 1:
(y,x),_=input_qi(g,m)
if g.set(y,x):
break
if g.heqi() or g.win_user:
break
print(g)
while 1:
try:
r=input('o:')
y,x=r.split(',')
y=int(y)
x=int(x)
if g.set(y,x):
break
except exception as e:
print(e)
print(g)
print('winner',g.win_user)
playx()总结
到此这篇关于用pytorch训练五子棋ai的文章就介绍到这了,更多相关pytorch训练五子棋ai内容请搜索代码网以前的文章或继续浏览下面的相关文章希望大家以后多多支持代码网!
发表评论