python判断图片相似度找出相似的图像问题_Python

python判断图片相似度，找出相似的图像

背景

找到个磁共振数据集做训练，时需要从两个文件夹中找出相似的图像对

思路

是从a文件里选定一张图片，遍历b文件夹，找出最相似的，超过阈值则保存

代码

import os
import cv2
import numpy as np
from skimage.metrics import structural_similarity as ssim
from concurrent.futures import threadpoolexecutor, as_completed

def load_images_from_folder(folder):
    """
    从指定文件夹加载灰度图像。
    
    参数:
    folder (str): 包含图像的文件夹路径。
    
    返回:
    dict: 以文件名为键，加载的图像为值的字典。
    """
    images = {}
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        img = cv2.imread(img_path, cv2.imread_grayscale)
        if img is not none:
            images[filename] = img
    return images

def crop_center(img, crop_fraction=0.6):
    """
    裁剪图像的中心区域。
    
    参数:
    img (ndarray): 输入图像。
    crop_fraction (float): 要保留的图像部分的比例。
    
    返回:
    ndarray: 裁剪后的图像。
    """
    h, w = img.shape
    crop_h, crop_w = int(h * crop_fraction), int(w * crop_fraction)
    start_h, start_w = (h - crop_h) // 2, (w - crop_w) // 2
    return img[start_h:start_h + crop_h, start_w:start_w + crop_w]

def find_best_match(image, image_dict):
    """
    在图像字典中找到与输入图像最相似的图像。
    
    参数:
    image (ndarray): 输入图像。
    image_dict (dict): 包含图像的字典。
    
    返回:
    tuple: 最相似图像的文件名和相似度值。
    """
    max_ssim = -1
    best_match = none
    cropped_image = crop_center(image)
    for filename, img in image_dict.items():
        cropped_img = crop_center(img)
        # 将裁剪后的输入图像调整到与裁剪后的比较图像相同的大小
        resized_image = cv2.resize(cropped_image, (cropped_img.shape[1], cropped_img.shape[0]))
        current_ssim = ssim(resized_image, cropped_img)
        if current_ssim > max_ssim:
            max_ssim = current_ssim
            best_match = filename
    return best_match, max_ssim

def skip_black_background_image(image, threshold=0.06):
    """
    跳过黑色背景的图像。
    
    参数:
    image (ndarray): 输入图像。
    threshold (float): 非黑色像素的阈值比例。
    
    返回:
    bool: 如果图像主要是黑色背景，则返回true，否则返回false。
    """
    # 计算非黑色像素的百分比
    num_non_black_pixels = np.sum(image > 0)
    total_pixels = image.size
    if num_non_black_pixels / total_pixels < threshold:
        return true
    return false

def process_patient_folder(patient_folder, train_folder, output_folder, similarity_threshold, file_index):
    """
    处理病人的文件夹，找到与每个t1w图像最匹配的t1wce图像，并保存匹配对。
    
    参数:
    patient_folder (str): 病人文件夹的名称。
    train_folder (str): 训练数据的根文件夹。
    output_folder (str): 输出文件夹路径。
    similarity_threshold (float): 图像相似度的阈值。
    file_index (int): 输出文件的起始索引。
    
    返回:
    int: 更新后的文件索引。
    """
    patient_path = os.path.join(train_folder, patient_folder)
    t1w_folder = os.path.join(patient_path, 't1w')
    t1wce_folder = os.path.join(patient_path, 't1wce')

    if not os.path.exists(t1w_folder) or not os.path.exists(t1wce_folder):
        return file_index

    t1w_images = load_images_from_folder(t1w_folder)
    t1wce_images = load_images_from_folder(t1wce_folder)

    total_t1w_images = len(t1w_images)
    unmatched_count = 0
    # 可选地，保存或处理匹配的图像对
    patient_output_folder = os.path.join(output_folder, patient_folder)

    for t1w_filename, t1w_image in t1w_images.items():
        if skip_black_background_image(t1w_image):
            unmatched_count += 1
            continue

        best_match, max_ssim = find_best_match(t1w_image, t1wce_images)
        if max_ssim >= similarity_threshold:
            if not os.path.exists(patient_output_folder):
                os.makedirs(patient_output_folder)
            print(f'病人: {patient_folder}, t1w: {t1w_filename}, t1wce: {best_match}, ssim: {max_ssim}, 输出文件: {file_index}_t1w.png')
            t1w_output_path = os.path.join(patient_output_folder, f"{file_index}_t1w.png")
            t1wce_output_path = os.path.join(patient_output_folder, f"{file_index}_t1wce.png")
            cv2.imwrite(t1w_output_path, t1w_image)
            cv2.imwrite(t1wce_output_path, t1wce_images[best_match])
            file_index += 1

    return file_index

# 定义路径
train_folder = './train'
output_folder = 'output_folder'
similarity_threshold = 0.75

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

file_index = 1
patient_folders = [f for f in os.listdir(train_folder) if os.path.isdir(os.path.join(train_folder, f))]

with threadpoolexecutor(max_workers=6) as executor:
    future_to_patient = {
        executor.submit(process_patient_folder, patient_folder, train_folder, output_folder, similarity_threshold, file_index): patient_folder for patient_folder in patient_folders}
    for future in as_completed(future_to_patient):
        patient_folder = future_to_patient[future]
        try:
            file_index = future.result()
        except exception as exc:
            print(f'{patient_folder} 生成异常: {exc}')
        else:
            print(f'{patient_folder} 处理完成。')