Add support for de-duplicating chapters and selecting the best version

This commit is contained in:
Ben
2025-04-08 19:36:39 +00:00
parent bc6f495c63
commit 4a00923d07
2 changed files with 199 additions and 5 deletions

View File

@@ -29,6 +29,55 @@ def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, fo
return True, "Skipped (already exists)"
try:
# De-duplicate chapters (keep only one copy of each chapter number)
if len(chapter_infos) > 1:
# Group chapters by their chapter number
chapter_groups = {}
for chapter in chapter_infos:
chapter_num = str(chapter['chapter']) # Convert to string for exact matching
if chapter_num not in chapter_groups:
chapter_groups[chapter_num] = []
chapter_groups[chapter_num].append(chapter)
# Check for duplicates
duplicates_found = False
for chapter_num, group in chapter_groups.items():
if len(group) > 1:
duplicates_found = True
break
if duplicates_found:
# Create a new de-duplicated chapter list
unique_chapters = []
if verbose:
print("Found duplicate chapters, selecting one copy of each:")
for chapter_num, group in chapter_groups.items():
if len(group) > 1:
# We have duplicates for this chapter
if verbose:
print(f" Chapter {chapter_num} has {len(group)} copies:")
for i, ch in enumerate(group):
print(f" {i+1}. {os.path.basename(ch['filename'])}")
# Select the best version - prefer certain scan groups or larger files
selected_chapter = select_best_chapter(group, extra_verbose)
if verbose:
print(f" Selected: {os.path.basename(selected_chapter['filename'])}")
unique_chapters.append(selected_chapter)
else:
# Only one copy, just add it
unique_chapters.append(group[0])
# Replace the original chapter list with the de-duplicated one
chapter_infos = unique_chapters
if verbose:
print(f"De-duplicated chapter list now has {len(chapter_infos)} chapters")
# Verify all chapter files exist before starting and attempt to fix missing files
missing_files = []
for chapter_info in chapter_infos:
@@ -177,4 +226,31 @@ def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, fo
return True, "Created"
except Exception as e:
return False, str(e)
return False, str(e)
def select_best_chapter(chapter_group, extra_verbose=False):
"""
Select the best chapter from a group of duplicate chapters.
Strategy:
1. If all files exist, pick the largest one (likely better quality)
2. If some don't exist, pick one that exists
3. If none exist, just return the first one
"""
existing_chapters = [ch for ch in chapter_group if os.path.exists(ch['filename'])]
if not existing_chapters:
if extra_verbose:
print("Warning: None of the duplicate chapters exist on disk")
return chapter_group[0] # Return the first one and hope for the best
# Get file sizes for existing chapters
for chapter in existing_chapters:
chapter['filesize'] = os.path.getsize(chapter['filename'])
# Sort by file size (descending) - bigger files are often better quality
existing_chapters.sort(key=lambda x: x['filesize'], reverse=True)
if extra_verbose:
print(f"Selected largest file: {os.path.basename(existing_chapters[0]['filename'])} ({existing_chapters[0]['filesize'] / 1024:.1f} KB)")
return existing_chapters[0]