256 lines
12 KiB
Python
256 lines
12 KiB
Python
# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/volume.py
|
|
import os
|
|
import zipfile
|
|
import tempfile
|
|
import shutil
|
|
from tqdm import tqdm
|
|
from .file_utils import fix_missing_files, find_file_by_volume_chapter, has_problematic_characters
|
|
|
|
def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, force=False, verbose=False, extra_verbose=False):
|
|
"""Combine multiple chapter CBZ files into a single volume CBZ."""
|
|
# Determine output path
|
|
volume_filename = f"{manga_name} - Volume {volume_num:02d}.cbz"
|
|
|
|
if output_dir:
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
output_path = os.path.join(output_dir, volume_filename)
|
|
else:
|
|
# Use the directory of the first chapter
|
|
output_path = os.path.join(os.path.dirname(chapter_infos[0]['filename']), volume_filename)
|
|
|
|
if verbose:
|
|
print(f"Creating volume: {volume_filename}")
|
|
print(f"Output path: {output_path}")
|
|
|
|
# Check if volume already exists
|
|
if os.path.exists(output_path) and not force:
|
|
if verbose:
|
|
print(f"Skipping {volume_filename} (already exists)")
|
|
return True, "Skipped (already exists)"
|
|
|
|
try:
|
|
# De-duplicate chapters (keep only one copy of each chapter number)
|
|
if len(chapter_infos) > 1:
|
|
# Group chapters by their chapter number
|
|
chapter_groups = {}
|
|
for chapter in chapter_infos:
|
|
chapter_num = str(chapter['chapter']) # Convert to string for exact matching
|
|
if chapter_num not in chapter_groups:
|
|
chapter_groups[chapter_num] = []
|
|
chapter_groups[chapter_num].append(chapter)
|
|
|
|
# Check for duplicates
|
|
duplicates_found = False
|
|
for chapter_num, group in chapter_groups.items():
|
|
if len(group) > 1:
|
|
duplicates_found = True
|
|
break
|
|
|
|
if duplicates_found:
|
|
# Create a new de-duplicated chapter list
|
|
unique_chapters = []
|
|
|
|
if verbose:
|
|
print("Found duplicate chapters, selecting one copy of each:")
|
|
|
|
for chapter_num, group in chapter_groups.items():
|
|
if len(group) > 1:
|
|
# We have duplicates for this chapter
|
|
if verbose:
|
|
print(f" Chapter {chapter_num} has {len(group)} copies:")
|
|
for i, ch in enumerate(group):
|
|
print(f" {i+1}. {os.path.basename(ch['filename'])}")
|
|
|
|
# Select the best version - prefer certain scan groups or larger files
|
|
selected_chapter = select_best_chapter(group, extra_verbose)
|
|
|
|
if verbose:
|
|
print(f" Selected: {os.path.basename(selected_chapter['filename'])}")
|
|
|
|
unique_chapters.append(selected_chapter)
|
|
else:
|
|
# Only one copy, just add it
|
|
unique_chapters.append(group[0])
|
|
|
|
# Replace the original chapter list with the de-duplicated one
|
|
chapter_infos = unique_chapters
|
|
|
|
if verbose:
|
|
print(f"De-duplicated chapter list now has {len(chapter_infos)} chapters")
|
|
|
|
# Verify all chapter files exist before starting and attempt to fix missing files
|
|
missing_files = []
|
|
for chapter_info in chapter_infos:
|
|
if not os.path.exists(chapter_info['filename']):
|
|
missing_files.append(chapter_info['filename'])
|
|
|
|
if missing_files:
|
|
if extra_verbose:
|
|
print(f"Found {len(missing_files)} missing files, attempting to fix:")
|
|
for missing in missing_files:
|
|
print(f" - {os.path.basename(missing)}")
|
|
|
|
# Try to fix missing files by finding alternatives
|
|
chapter_dir = os.path.dirname(chapter_infos[0]['filename'])
|
|
fixed_chapters, unresolved_chapters = fix_missing_files(chapter_infos, chapter_dir, extra_verbose)
|
|
|
|
if unresolved_chapters:
|
|
if extra_verbose:
|
|
print(f"Could not resolve {len(unresolved_chapters)} missing files:")
|
|
for chapter in unresolved_chapters:
|
|
print(f" - {os.path.basename(chapter['filename'])}")
|
|
|
|
# Ask if user wants to skip unresolved chapters and continue with available ones
|
|
if len(fixed_chapters) >= 2: # Only offer to continue if we have at least 2 chapters
|
|
print(f"WARNING: {len(unresolved_chapters)} chapters couldn't be found.")
|
|
print(f"Would you like to continue with the {len(fixed_chapters)} available chapters? (y/n)")
|
|
response = input().strip().lower()
|
|
|
|
if response != 'y':
|
|
return False, f"Missing {len(unresolved_chapters)} chapter file(s) after resolution attempts"
|
|
|
|
print(f"Continuing with {len(fixed_chapters)} available chapters")
|
|
chapter_infos = fixed_chapters
|
|
else:
|
|
return False, f"Missing {len(unresolved_chapters)} chapter file(s) after resolution attempts"
|
|
else:
|
|
# Update chapter_infos with the fixed list
|
|
chapter_infos = fixed_chapters
|
|
|
|
if extra_verbose:
|
|
print("All missing files resolved, proceeding with conversion")
|
|
|
|
# Final verification before proceeding
|
|
for chapter_info in chapter_infos:
|
|
if not os.path.exists(chapter_info['filename']):
|
|
if extra_verbose:
|
|
print(f"Fatal error: File still missing after resolution: {chapter_info['filename']}")
|
|
return False, f"File still missing after resolution: {os.path.basename(chapter_info['filename'])}"
|
|
|
|
# Create a temporary directory for extraction
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
# Extract all chapters in order
|
|
for chapter_info in tqdm(chapter_infos, desc=f"Extracting chapters for Volume {volume_num}", disable=not verbose):
|
|
chapter_dir = os.path.join(temp_dir, f"chapter_{chapter_info['chapter_str'].zfill(3)}")
|
|
os.makedirs(chapter_dir, exist_ok=True)
|
|
|
|
if verbose:
|
|
print(f"Extracting chapter {chapter_info['chapter_str']}")
|
|
|
|
# IMPORTANT: Handle file matching right before extraction
|
|
file_exists = os.path.exists(chapter_info['filename'])
|
|
|
|
if not file_exists:
|
|
# Try to find the file by volume and chapter
|
|
directory = os.path.dirname(chapter_info['filename'])
|
|
vol_num = chapter_info['volume']
|
|
chap_num = chapter_info['chapter_str']
|
|
|
|
if extra_verbose:
|
|
print(f"File not found: {chapter_info['filename']}")
|
|
print(f"Looking for alternative file with volume {vol_num}, chapter {chap_num}")
|
|
|
|
# Find by volume and chapter numbers only
|
|
actual_file = find_file_by_volume_chapter(directory, vol_num, chap_num, extra_verbose)
|
|
|
|
if actual_file and os.path.exists(actual_file):
|
|
chapter_info['filename'] = actual_file
|
|
file_exists = True
|
|
if extra_verbose:
|
|
print(f"Found alternative file: {actual_file}")
|
|
|
|
# Skip if file still doesn't exist
|
|
if not file_exists:
|
|
if extra_verbose:
|
|
print(f"Skipping chapter {chapter_info['chapter_str']} - file not found")
|
|
continue
|
|
|
|
if extra_verbose:
|
|
print(f"File: {chapter_info['filename']}")
|
|
|
|
try:
|
|
# Extract the chapter
|
|
with zipfile.ZipFile(chapter_info['filename'], 'r') as zf:
|
|
file_list = sorted(zf.namelist())
|
|
|
|
if extra_verbose:
|
|
print(f" Contains {len(file_list)} files")
|
|
|
|
for i, file_name in enumerate(file_list):
|
|
if file_name.endswith('/'): # Skip directories
|
|
continue
|
|
|
|
# Extract with a standardized naming pattern
|
|
base, ext = os.path.splitext(os.path.basename(file_name))
|
|
new_name = f"chapter{chapter_info['chapter_str'].zfill(3)}_{i+1:03d}{ext}"
|
|
|
|
# Extract file to temp directory
|
|
try:
|
|
with zf.open(file_name) as source, open(os.path.join(chapter_dir, new_name), 'wb') as target:
|
|
shutil.copyfileobj(source, target)
|
|
except Exception as file_error:
|
|
if extra_verbose:
|
|
print(f" ERROR extracting {file_name}: {str(file_error)}")
|
|
except Exception as e:
|
|
if extra_verbose:
|
|
print(f"Error extracting chapter: {e}")
|
|
continue # Skip this chapter but continue with others
|
|
|
|
# Create the volume CBZ
|
|
if verbose:
|
|
print(f"Creating volume CBZ: {volume_filename}")
|
|
|
|
try:
|
|
with zipfile.ZipFile(output_path, 'w') as volume_zip:
|
|
# Add all files from all chapters in order
|
|
chapter_dirs = sorted(os.listdir(temp_dir))
|
|
|
|
for chapter_dir in chapter_dirs:
|
|
chapter_path = os.path.join(temp_dir, chapter_dir)
|
|
if os.path.isdir(chapter_path):
|
|
chapter_files = sorted(os.listdir(chapter_path))
|
|
|
|
if extra_verbose:
|
|
print(f"Adding directory: {chapter_dir} ({len(chapter_files)} files)")
|
|
|
|
for file in chapter_files:
|
|
file_path = os.path.join(chapter_path, file)
|
|
arc_name = os.path.join(chapter_dir, file)
|
|
|
|
if extra_verbose and chapter_files.index(file) < 5:
|
|
print(f" - Adding {arc_name}")
|
|
|
|
volume_zip.write(file_path, arc_name)
|
|
except Exception as e:
|
|
return False, f"Error creating volume ZIP: {str(e)}"
|
|
|
|
return True, "Created"
|
|
except Exception as e:
|
|
return False, str(e)
|
|
|
|
def select_best_chapter(chapter_group, extra_verbose=False):
|
|
"""
|
|
Select the best chapter from a group of duplicate chapters.
|
|
Strategy:
|
|
1. If all files exist, pick the largest one (likely better quality)
|
|
2. If some don't exist, pick one that exists
|
|
3. If none exist, just return the first one
|
|
"""
|
|
existing_chapters = [ch for ch in chapter_group if os.path.exists(ch['filename'])]
|
|
|
|
if not existing_chapters:
|
|
if extra_verbose:
|
|
print("Warning: None of the duplicate chapters exist on disk")
|
|
return chapter_group[0] # Return the first one and hope for the best
|
|
|
|
# Get file sizes for existing chapters
|
|
for chapter in existing_chapters:
|
|
chapter['filesize'] = os.path.getsize(chapter['filename'])
|
|
|
|
# Sort by file size (descending) - bigger files are often better quality
|
|
existing_chapters.sort(key=lambda x: x['filesize'], reverse=True)
|
|
|
|
if extra_verbose:
|
|
print(f"Selected largest file: {os.path.basename(existing_chapters[0]['filename'])} ({existing_chapters[0]['filesize'] / 1024:.1f} KB)")
|
|
|
|
return existing_chapters[0] |