# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/volume.py import os import zipfile import tempfile import shutil from tqdm import tqdm from .file_utils import fix_missing_files, find_file_by_volume_chapter, has_problematic_characters def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, force=False, verbose=False, extra_verbose=False): """Combine multiple chapter CBZ files into a single volume CBZ.""" # Determine output path volume_filename = f"{manga_name} - Volume {volume_num:02d}.cbz" if output_dir: os.makedirs(output_dir, exist_ok=True) output_path = os.path.join(output_dir, volume_filename) else: # Use the directory of the first chapter output_path = os.path.join(os.path.dirname(chapter_infos[0]['filename']), volume_filename) if verbose: print(f"Creating volume: {volume_filename}") print(f"Output path: {output_path}") # Check if volume already exists if os.path.exists(output_path) and not force: if verbose: print(f"Skipping {volume_filename} (already exists)") return True, "Skipped (already exists)" try: # De-duplicate chapters (keep only one copy of each chapter number) if len(chapter_infos) > 1: # Group chapters by their chapter number chapter_groups = {} for chapter in chapter_infos: chapter_num = str(chapter['chapter']) # Convert to string for exact matching if chapter_num not in chapter_groups: chapter_groups[chapter_num] = [] chapter_groups[chapter_num].append(chapter) # Check for duplicates duplicates_found = False for chapter_num, group in chapter_groups.items(): if len(group) > 1: duplicates_found = True break if duplicates_found: # Create a new de-duplicated chapter list unique_chapters = [] if verbose: print("Found duplicate chapters, selecting one copy of each:") for chapter_num, group in chapter_groups.items(): if len(group) > 1: # We have duplicates for this chapter if verbose: print(f" Chapter {chapter_num} has {len(group)} copies:") for i, ch in enumerate(group): print(f" {i+1}. {os.path.basename(ch['filename'])}") # Select the best version - prefer certain scan groups or larger files selected_chapter = select_best_chapter(group, extra_verbose) if verbose: print(f" Selected: {os.path.basename(selected_chapter['filename'])}") unique_chapters.append(selected_chapter) else: # Only one copy, just add it unique_chapters.append(group[0]) # Replace the original chapter list with the de-duplicated one chapter_infos = unique_chapters if verbose: print(f"De-duplicated chapter list now has {len(chapter_infos)} chapters") # Verify all chapter files exist before starting and attempt to fix missing files missing_files = [] for chapter_info in chapter_infos: if not os.path.exists(chapter_info['filename']): missing_files.append(chapter_info['filename']) if missing_files: if extra_verbose: print(f"Found {len(missing_files)} missing files, attempting to fix:") for missing in missing_files: print(f" - {os.path.basename(missing)}") # Try to fix missing files by finding alternatives chapter_dir = os.path.dirname(chapter_infos[0]['filename']) fixed_chapters, unresolved_chapters = fix_missing_files(chapter_infos, chapter_dir, extra_verbose) if unresolved_chapters: if extra_verbose: print(f"Could not resolve {len(unresolved_chapters)} missing files:") for chapter in unresolved_chapters: print(f" - {os.path.basename(chapter['filename'])}") # Ask if user wants to skip unresolved chapters and continue with available ones if len(fixed_chapters) >= 2: # Only offer to continue if we have at least 2 chapters print(f"WARNING: {len(unresolved_chapters)} chapters couldn't be found.") print(f"Would you like to continue with the {len(fixed_chapters)} available chapters? (y/n)") response = input().strip().lower() if response != 'y': return False, f"Missing {len(unresolved_chapters)} chapter file(s) after resolution attempts" print(f"Continuing with {len(fixed_chapters)} available chapters") chapter_infos = fixed_chapters else: return False, f"Missing {len(unresolved_chapters)} chapter file(s) after resolution attempts" else: # Update chapter_infos with the fixed list chapter_infos = fixed_chapters if extra_verbose: print("All missing files resolved, proceeding with conversion") # Final verification before proceeding for chapter_info in chapter_infos: if not os.path.exists(chapter_info['filename']): if extra_verbose: print(f"Fatal error: File still missing after resolution: {chapter_info['filename']}") return False, f"File still missing after resolution: {os.path.basename(chapter_info['filename'])}" # Create a temporary directory for extraction with tempfile.TemporaryDirectory() as temp_dir: # Extract all chapters in order for chapter_info in tqdm(chapter_infos, desc=f"Extracting chapters for Volume {volume_num}", disable=not verbose): chapter_dir = os.path.join(temp_dir, f"chapter_{chapter_info['chapter_str'].zfill(3)}") os.makedirs(chapter_dir, exist_ok=True) if verbose: print(f"Extracting chapter {chapter_info['chapter_str']}") # IMPORTANT: Handle file matching right before extraction file_exists = os.path.exists(chapter_info['filename']) if not file_exists: # Try to find the file by volume and chapter directory = os.path.dirname(chapter_info['filename']) vol_num = chapter_info['volume'] chap_num = chapter_info['chapter_str'] if extra_verbose: print(f"File not found: {chapter_info['filename']}") print(f"Looking for alternative file with volume {vol_num}, chapter {chap_num}") # Find by volume and chapter numbers only actual_file = find_file_by_volume_chapter(directory, vol_num, chap_num, extra_verbose) if actual_file and os.path.exists(actual_file): chapter_info['filename'] = actual_file file_exists = True if extra_verbose: print(f"Found alternative file: {actual_file}") # Skip if file still doesn't exist if not file_exists: if extra_verbose: print(f"Skipping chapter {chapter_info['chapter_str']} - file not found") continue if extra_verbose: print(f"File: {chapter_info['filename']}") try: # Extract the chapter with zipfile.ZipFile(chapter_info['filename'], 'r') as zf: file_list = sorted(zf.namelist()) if extra_verbose: print(f" Contains {len(file_list)} files") for i, file_name in enumerate(file_list): if file_name.endswith('/'): # Skip directories continue # Extract with a standardized naming pattern base, ext = os.path.splitext(os.path.basename(file_name)) new_name = f"chapter{chapter_info['chapter_str'].zfill(3)}_{i+1:03d}{ext}" # Extract file to temp directory try: with zf.open(file_name) as source, open(os.path.join(chapter_dir, new_name), 'wb') as target: shutil.copyfileobj(source, target) except Exception as file_error: if extra_verbose: print(f" ERROR extracting {file_name}: {str(file_error)}") except Exception as e: if extra_verbose: print(f"Error extracting chapter: {e}") continue # Skip this chapter but continue with others # Create the volume CBZ if verbose: print(f"Creating volume CBZ: {volume_filename}") try: with zipfile.ZipFile(output_path, 'w') as volume_zip: # Add all files from all chapters in order chapter_dirs = sorted(os.listdir(temp_dir)) for chapter_dir in chapter_dirs: chapter_path = os.path.join(temp_dir, chapter_dir) if os.path.isdir(chapter_path): chapter_files = sorted(os.listdir(chapter_path)) if extra_verbose: print(f"Adding directory: {chapter_dir} ({len(chapter_files)} files)") for file in chapter_files: file_path = os.path.join(chapter_path, file) arc_name = os.path.join(chapter_dir, file) if extra_verbose and chapter_files.index(file) < 5: print(f" - Adding {arc_name}") volume_zip.write(file_path, arc_name) except Exception as e: return False, f"Error creating volume ZIP: {str(e)}" return True, "Created" except Exception as e: return False, str(e) def select_best_chapter(chapter_group, extra_verbose=False): """ Select the best chapter from a group of duplicate chapters. Strategy: 1. If all files exist, pick the largest one (likely better quality) 2. If some don't exist, pick one that exists 3. If none exist, just return the first one """ existing_chapters = [ch for ch in chapter_group if os.path.exists(ch['filename'])] if not existing_chapters: if extra_verbose: print("Warning: None of the duplicate chapters exist on disk") return chapter_group[0] # Return the first one and hope for the best # Get file sizes for existing chapters for chapter in existing_chapters: chapter['filesize'] = os.path.getsize(chapter['filename']) # Sort by file size (descending) - bigger files are often better quality existing_chapters.sort(key=lambda x: x['filesize'], reverse=True) if extra_verbose: print(f"Selected largest file: {os.path.basename(existing_chapters[0]['filename'])} ({existing_chapters[0]['filesize'] / 1024:.1f} KB)") return existing_chapters[0]