Add support for de-duplicating chapters and selecting the best version
This commit is contained in:
@@ -29,6 +29,55 @@ def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, fo
|
||||
return True, "Skipped (already exists)"
|
||||
|
||||
try:
|
||||
# De-duplicate chapters (keep only one copy of each chapter number)
|
||||
if len(chapter_infos) > 1:
|
||||
# Group chapters by their chapter number
|
||||
chapter_groups = {}
|
||||
for chapter in chapter_infos:
|
||||
chapter_num = str(chapter['chapter']) # Convert to string for exact matching
|
||||
if chapter_num not in chapter_groups:
|
||||
chapter_groups[chapter_num] = []
|
||||
chapter_groups[chapter_num].append(chapter)
|
||||
|
||||
# Check for duplicates
|
||||
duplicates_found = False
|
||||
for chapter_num, group in chapter_groups.items():
|
||||
if len(group) > 1:
|
||||
duplicates_found = True
|
||||
break
|
||||
|
||||
if duplicates_found:
|
||||
# Create a new de-duplicated chapter list
|
||||
unique_chapters = []
|
||||
|
||||
if verbose:
|
||||
print("Found duplicate chapters, selecting one copy of each:")
|
||||
|
||||
for chapter_num, group in chapter_groups.items():
|
||||
if len(group) > 1:
|
||||
# We have duplicates for this chapter
|
||||
if verbose:
|
||||
print(f" Chapter {chapter_num} has {len(group)} copies:")
|
||||
for i, ch in enumerate(group):
|
||||
print(f" {i+1}. {os.path.basename(ch['filename'])}")
|
||||
|
||||
# Select the best version - prefer certain scan groups or larger files
|
||||
selected_chapter = select_best_chapter(group, extra_verbose)
|
||||
|
||||
if verbose:
|
||||
print(f" Selected: {os.path.basename(selected_chapter['filename'])}")
|
||||
|
||||
unique_chapters.append(selected_chapter)
|
||||
else:
|
||||
# Only one copy, just add it
|
||||
unique_chapters.append(group[0])
|
||||
|
||||
# Replace the original chapter list with the de-duplicated one
|
||||
chapter_infos = unique_chapters
|
||||
|
||||
if verbose:
|
||||
print(f"De-duplicated chapter list now has {len(chapter_infos)} chapters")
|
||||
|
||||
# Verify all chapter files exist before starting and attempt to fix missing files
|
||||
missing_files = []
|
||||
for chapter_info in chapter_infos:
|
||||
@@ -177,4 +226,31 @@ def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, fo
|
||||
|
||||
return True, "Created"
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
return False, str(e)
|
||||
|
||||
def select_best_chapter(chapter_group, extra_verbose=False):
|
||||
"""
|
||||
Select the best chapter from a group of duplicate chapters.
|
||||
Strategy:
|
||||
1. If all files exist, pick the largest one (likely better quality)
|
||||
2. If some don't exist, pick one that exists
|
||||
3. If none exist, just return the first one
|
||||
"""
|
||||
existing_chapters = [ch for ch in chapter_group if os.path.exists(ch['filename'])]
|
||||
|
||||
if not existing_chapters:
|
||||
if extra_verbose:
|
||||
print("Warning: None of the duplicate chapters exist on disk")
|
||||
return chapter_group[0] # Return the first one and hope for the best
|
||||
|
||||
# Get file sizes for existing chapters
|
||||
for chapter in existing_chapters:
|
||||
chapter['filesize'] = os.path.getsize(chapter['filename'])
|
||||
|
||||
# Sort by file size (descending) - bigger files are often better quality
|
||||
existing_chapters.sort(key=lambda x: x['filesize'], reverse=True)
|
||||
|
||||
if extra_verbose:
|
||||
print(f"Selected largest file: {os.path.basename(existing_chapters[0]['filename'])} ({existing_chapters[0]['filesize'] / 1024:.1f} KB)")
|
||||
|
||||
return existing_chapters[0]
|
||||
Reference in New Issue
Block a user