python生成指定文件大小
方法一(速度最快)
def create_file_fast(file_path, size_mb):
"""快速生成指定大小的空文件(全0填充)"""
size_bytes = size_mb * 1024 * 1024
with open(file_path, 'wb') as f:
f.seek(size_bytes - 1) # 定位到文件末尾前一字节
f.write(b'\0') # 写入单个空字节
# 示例:生成500mb文件
if __name__ == '__main__:
create_file_fast('100mb_file.bin', 500)方法二(中等速度)
import os
import random
def create_random_file(file_path, size_mb):
"""生成包含随机内容的文件"""
size_bytes = size_mb * 1024 * 1024
chunk_size = 1024 * 1024 # 每次写入1mb
with open(file_path, 'wb') as f:
while size_bytes > 0:
chunk = min(chunk_size, size_bytes)
# 生成随机字节数据(比os.urandom更快)
data = bytes(random.getrandbits(8) for _ in range(chunk))
f.write(data)
size_bytes -= chunk
# 示例:生成500mb文件
if __name__ == '__main__:
create_random_file('random_500mb.bin', 500)方法三(生成可读文本文件–较慢)
import string
import random
def create_text_file(file_path, size_mb, line_length=80):
"""生成包含随机文本的文件(带换行符)"""
size_bytes = size_mb * 1024 * 1024
chars = string.ascii_letters + string.digits + string.punctuation + ' '
with open(file_path, 'w') as f:
while size_bytes > 0:
# 生成一行文本
line = ''.join(random.choices(chars, k=line_length))
line += '\n' # 添加换行符
f.write(line)
size_bytes -= len(line.encode('utf-8')) # 计算utf-8编码后的字节大小
# 示例:生成500mb文本文件
if __name__ == '__main__:
create_text_file('10mb_text.txt', 500)方法四(使用内存映射高效生成–超大文件)
import mmap
import os
def create_large_file(file_path, size_gb):
"""高效生成超大文件(gb级别)"""
size_bytes = size_gb * 1024 ** 3
# 创建空文件并设置大小
with open(file_path, 'wb') as f:
f.truncate(size_bytes)
# 使用内存映射填充随机数据
with open(file_path, 'r+b') as f:
mm = mmap.mmap(f.fileno(), 0)
chunk_size = 1024 * 1024 * 100 # 每次处理100mb
for offset in range(0, size_bytes, chunk_size):
end = min(offset + chunk_size, size_bytes)
mm[offset:end] = os.urandom(end - offset)
mm.close()
# 示例:生成5gb文件
if __name__ == '__main__:
create_large_file('5gb_file.dat', 5)方法五(生成特定模式文件)
def create_pattern_file(file_path, size_mb, pattern=b'abcd'):
"""生成重复模式的文件(用于测试)"""
size_bytes = size_mb * 1024 * 1024
pattern_len = len(pattern)
with open(file_path, 'wb') as f:
# 计算完整模式块数量
full_blocks = size_bytes // pattern_len
# 剩余字节
remainder = size_bytes % pattern_len
# 写入完整块
for _ in range(full_blocks):
f.write(pattern)
# 写入剩余部分
if remainder:
f.write(pattern[:remainder])
# 示例:生成500mb的重复模式文件
if __name__ == '__main__:
create_pattern_file('pattern_50mb.dat', 500, b'testpattern')方法六
def run(file_size):
#输入的文件大小去除首尾空格后切割,如果列表中只有一个数字说明是整数,否则就是小数
file_path = "d:/dir/"
file_size = file_size
print('file_size', file_size)
new_file_size = file_size.strip()
file_size_list = new_file_size.split(".")
start = time.time()
if len(file_size_list) == 1:
int_size_mb(file_size)
print("文件大小{}mb,已存入地址{}".format(file_size, file_path))
else:
int_size_mb(file_size)
float_size_mb(file_size)
print("文件大小{}mb,已存入地址{}".format(file_size,file_path))
end = time.time()
timer = end - start
speed = float(file_size) / timer
# print(start, end, timer, speed)
logger.info('写入文件共耗时:' + str(timer) + ',写入速度为:' + str(speed))
def int_size_mb(file_size):
file_path = "d:/dir/"
file_name = 'test_file'
new_file_size = file_size.strip()
file_size_list = new_file_size.split(".")
#整数部分用写入文件w方式
with open(file_path+file_name,"w") as file:
#b-kb-mb文件大小转化
for i in range(int(file_size_list[0])):
for j in range(1024):
file.write("01"*512)
def float_size_mb(file_size):
file_path = "d:/dir/"
file_name = 'test_file'
new_file_size = file_size.strip()
file_size_list = new_file_size.split(".")
#小数部分用追加写入a方法
file_size = file_size
with open(file_path+file_name,"a") as file:
#获取小数(单位mb)
float_size_mb=float(file_size)-int(file_size_list[0])
for i in range(1024):
file.write("1"*int(1024*float_size_mb))
# 示例:生成500mb的文件
if __name__ == '__main__':
run(500)
总结
以上为个人经验,希望能给大家一个参考,也希望大家多多支持代码网。
发表评论