From 9fa11aea726926f52d9bea94b8e829ed9ede2d55 Mon Sep 17 00:00:00 2001 From: Ben Date: Fri, 21 Mar 2025 23:04:35 +0000 Subject: [PATCH] Init CBZ Volume Combiner v0.2.0 --- .gitignore | 32 +++ README.md | 65 +++++ bin/cbz-volume-combiner | 129 +++++++++ cbz_volume_combiner/__init__.py | 7 + cbz_volume_combiner/combine.py | 433 ++++++++++++++++++++++++++++++ cbz_volume_combiner/core.py | 31 +++ cbz_volume_combiner/file_utils.py | 125 +++++++++ cbz_volume_combiner/parsing.py | 71 +++++ cbz_volume_combiner/volume.py | 159 +++++++++++ requirements.txt | 1 + setup.py | 23 ++ 11 files changed, 1076 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100755 bin/cbz-volume-combiner create mode 100644 cbz_volume_combiner/__init__.py create mode 100644 cbz_volume_combiner/combine.py create mode 100644 cbz_volume_combiner/core.py create mode 100644 cbz_volume_combiner/file_utils.py create mode 100644 cbz_volume_combiner/parsing.py create mode 100644 cbz_volume_combiner/volume.py create mode 100644 requirements.txt create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..70968e2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,32 @@ +# Python bytecode files +__pycache__/ +*.py[cod] +*$py.class + +# Distribution / packaging +dist/ +build/ +*.egg-info/ +*.egg +pip-wheel-metadata/ + +# Virtual environments +venv/ +.venv/ +env/ +ENV/ + +# Editor specific files +.vscode/ +.idea/ +*.swp +*.swo + +# OS specific files +.DS_Store +Thumbs.db + +# Logs and temporary files +*.log +tmp/ +temp/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..50d4c3c --- /dev/null +++ b/README.md @@ -0,0 +1,65 @@ +# README.md for CBZ Volume Combiner + +```markdown +# CBZ Volume Combiner + +A tool for combining individual CBZ comic chapters into complete volume files. + +## Features + +- Combines multiple chapter CBZ files into single volume CBZ files +- Preserves chapter and page order +- Handles file naming and encoding issues +- Works with manga and comics stored in CBZ format + +## Installation + +```bash +# Clone the repository +git clone https://github.com/b3nw/cbz-volume-combiner.git +cd cbz-volume-combiner + +# Install the package +pip install -e . +``` + +## Usage + +```bash +# Basic usage +cbz-volume-combiner /path/to/manga + +# Recursive search +cbz-volume-combiner -r /path/to/manga + +# With verbose output +cbz-volume-combiner -v /path/to/manga + +# For detailed debugging information +cbz-volume-combiner -vv /path/to/manga + +# Output to a specific directory +cbz-volume-combiner -r -o /path/to/output /path/to/manga + +# Force recreation of volumes that already exist +cbz-volume-combiner -r -f /path/to/manga + +# Set minimum number of chapters required to create a volume +cbz-volume-combiner -r -m 3 /path/to/manga +``` + +## Expected File Naming Format + +The script works best with files named in this format: +``` +Manga Name v01 c001 Chapter Title [Group Name].cbz +``` + +Where: +- "v01" indicates volume 1 +- "c001" indicates chapter 1 +- Chapter title and scanlation group are optional + +## License + +MIT \ No newline at end of file diff --git a/bin/cbz-volume-combiner b/bin/cbz-volume-combiner new file mode 100755 index 0000000..138990a --- /dev/null +++ b/bin/cbz-volume-combiner @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/bin/cbz-volume-combiner + +import os +import sys +import argparse +from cbz_volume_combiner.file_utils import find_cbz_files +from cbz_volume_combiner.core import organize_by_volume +from cbz_volume_combiner.volume import create_volume_cbz + +def main(): + parser = argparse.ArgumentParser(description='Combine individual CBZ chapters into volume CBZ files') + parser.add_argument('folder', help='Folder containing CBZ chapter files') + parser.add_argument('-r', '--recursive', action='store_true', help='Search for CBZ files recursively') + parser.add_argument('-o', '--output', help='Output folder for volume CBZ files (defaults to same location as chapters)') + parser.add_argument('-f', '--force', action='store_true', help='Force creation even if volume CBZ already exists') + parser.add_argument('-v', '--verbose', action='store_true', help='Show detailed progress') + parser.add_argument('-vv', '--extra-verbose', action='store_true', help='Show extremely detailed debugging information') + parser.add_argument('-m', '--min-chapters', type=int, default=2, + help='Minimum number of chapters required to create a volume (default: 2)') + + args = parser.parse_args() + + # If extra-verbose is enabled, automatically enable verbose too + if args.extra_verbose: + args.verbose = True + + if not os.path.isdir(args.folder): + print(f"Error: '{args.folder}' is not a valid directory") + return 1 + + cbz_files = find_cbz_files(args.folder, args.recursive, args.extra_verbose) + + if not cbz_files: + print(f"No CBZ files found in '{args.folder}'") + return 0 + + print(f"Found {len(cbz_files)} CBZ file(s)") + + # Organize files by manga and volume + volumes = organize_by_volume(cbz_files, args.extra_verbose) + + total_manga = len(volumes) + total_volumes = sum(len(volumes[manga]) for manga in volumes) + + print(f"Found {total_manga} manga series with {total_volumes} volume(s) to process") + + # Print detailed manga and volume information in extra verbose mode + if args.extra_verbose: + print("\nDetailed manga and volume breakdown:") + for manga_name in volumes: + try: + first_volume = min(volumes[manga_name].keys()) + manga_display_name = volumes[manga_name][first_volume][0]['manga_name'] + print(f"\n{manga_display_name}:") + for volume_num in sorted(volumes[manga_name].keys()): + chapters = volumes[manga_name][volume_num] + print(f" Volume {volume_num}: {len(chapters)} chapters") + if args.extra_verbose: + for chapter in chapters: + print(f" - Chapter {chapter['chapter_str']}: {os.path.basename(chapter['filename'])}") + print(f" Exists: {os.path.exists(chapter['filename'])}") + except (ValueError, IndexError): + pass + + success_count = 0 + skip_count = 0 + fail_count = 0 + ignored_count = 0 + + # Process each manga and volume + for manga_name in volumes: + # Get a proper display name from the first volume's first chapter + try: + first_volume = min(volumes[manga_name].keys()) + manga_display_name = volumes[manga_name][first_volume][0]['manga_name'] + except (ValueError, IndexError, KeyError): + # Fallback if we can't get proper name + manga_display_name = manga_name + + if args.verbose: + print(f"\nProcessing manga: {manga_display_name}") + + for volume_num in sorted(volumes[manga_name].keys()): + chapters = volumes[manga_name][volume_num] + + # Skip volumes with too few chapters + if len(chapters) < args.min_chapters: + if args.verbose: + print(f"Skipping Volume {volume_num} - only has {len(chapters)} chapter(s) (minimum is {args.min_chapters})") + ignored_count += 1 + continue + + # List all chapters for debugging + if args.extra_verbose: + print(f"\nChapters for {manga_display_name} Volume {volume_num}:") + for chapter in chapters: + print(f" - Chapter {chapter['chapter_str']}: {os.path.basename(chapter['filename'])}") + print(f" Exists: {os.path.exists(chapter['filename'])}") + + success, message = create_volume_cbz( + manga_display_name, + volume_num, + chapters, + args.output, + args.force, + args.verbose, + args.extra_verbose + ) + + if success: + if message == "Skipped (already exists)": + skip_count += 1 + else: + success_count += 1 + else: + fail_count += 1 + print(f"Error creating Volume {volume_num} for {manga_display_name}: {message}") + + print(f"\nVolume creation complete:") + print(f" - {success_count} volumes created successfully") + print(f" - {skip_count} volumes skipped (already exist)") + print(f" - {ignored_count} volumes ignored (too few chapters)") + print(f" - {fail_count} volumes failed") + + return 0 + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file diff --git a/cbz_volume_combiner/__init__.py b/cbz_volume_combiner/__init__.py new file mode 100644 index 0000000..b6dd096 --- /dev/null +++ b/cbz_volume_combiner/__init__.py @@ -0,0 +1,7 @@ +# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/__init__.py +from .parsing import parse_manga_filename, normalize_filename +from .file_utils import find_cbz_files, fix_missing_files +from .core import organize_by_volume +from .volume import create_volume_cbz + +__version__ = "0.2.0" \ No newline at end of file diff --git a/cbz_volume_combiner/combine.py b/cbz_volume_combiner/combine.py new file mode 100644 index 0000000..1913053 --- /dev/null +++ b/cbz_volume_combiner/combine.py @@ -0,0 +1,433 @@ +import os +import re +import zipfile +import tempfile +import shutil +import argparse +from tqdm import tqdm +from collections import defaultdict +import unicodedata + +def normalize_filename(filename): + """Normalize a filename to handle encoding issues and special characters.""" + # Replace common problematic characters + replacements = { + '?': "'", # Replace question marks that might be incorrectly encoded apostrophes + '?': "'", # Another possible encoding of apostrophe + '?': '"', # Possible encoding of double quote + '?': '-', # Possible encoding of dash + '?': ' ' # Possible encoding of space + } + + for bad_char, good_char in replacements.items(): + filename = filename.replace(bad_char, good_char) + + return filename + +def parse_manga_filename(filename): + """Extract volume, chapter and title information from a manga filename.""" + # Pattern to match: manga_name v## c### [optional title] [optional group] + base_filename = os.path.basename(filename) + + # Try to normalize the filename to handle encoding issues + normalized_filename = normalize_filename(base_filename) + + # Use a more flexible pattern to handle apostrophes and other special characters + pattern = r'(.*?)\s+v(\d+)\s+c(\d+[.\d-]*)\s+(.*)\.cbz$' + match = re.match(pattern, normalized_filename) + + if match: + manga_name = match.group(1).strip() + volume = int(match.group(2)) + chapter_str = match.group(3) + rest = match.group(4) + + # Try to extract title and group if available + group_match = re.search(r'\[(.*?)\]$', rest) + if group_match: + group = group_match.group(1) + title = rest[:rest.rfind('[')-1].strip() + else: + group = "" + title = rest.strip() + + # Handle chapter numbers like "005.5" or "005-006" + try: + chapter = float(chapter_str) + except ValueError: + try: + chapter = float(chapter_str.split('-')[0]) # Take first number for ranges + except ValueError: + chapter = 0 # Fallback for unparseable chapter numbers + + return { + 'manga_name': manga_name, + 'volume': volume, + 'chapter': chapter, + 'chapter_str': chapter_str, + 'title': title, + 'group': group, + 'filename': filename + } + + if os.path.exists(filename): + # For debugging: print the filename that couldn't be parsed + print(f"WARNING: Could not parse filename: {base_filename}") + + return None + +def find_cbz_files(folder_path, recursive=False, extra_verbose=False): + """Find all CBZ files in the given folder.""" + cbz_files = [] + + if recursive: + if extra_verbose: + print(f"Recursively searching for CBZ files in {folder_path}") + + for root, _, files in os.walk(folder_path): + for file in files: + if file.lower().endswith('.cbz'): + cbz_files.append(os.path.join(root, file)) + else: + if extra_verbose: + print(f"Searching for CBZ files in {folder_path} (non-recursive)") + + for file in os.listdir(folder_path): + if file.lower().endswith('.cbz'): + cbz_files.append(os.path.join(folder_path, file)) + + if extra_verbose: + print(f"Found {len(cbz_files)} CBZ files") + + return cbz_files + +def fix_missing_files(chapter_infos, folder_path, extra_verbose=False): + """Attempt to find missing files by searching for similar filenames.""" + fixed_chapters = [] + unresolved_chapters = [] + + for chapter_info in chapter_infos: + if os.path.exists(chapter_info['filename']): + fixed_chapters.append(chapter_info) + continue + + # Get the problematic filename + base_filename = os.path.basename(chapter_info['filename']) + if extra_verbose: + print(f"Trying to find replacement for: {base_filename}") + + # Look for similar files in the directory + found_replacement = False + directory = os.path.dirname(chapter_info['filename']) + + try: + for file in os.listdir(directory): + if not file.lower().endswith('.cbz'): + continue + + # Check if volume and chapter match + v_match = re.search(fr"v{chapter_info['volume']:02d}", file) + c_match = re.search(fr"c{chapter_info['chapter_str']}", file.replace('?', "'")) + + if v_match and c_match: + if extra_verbose: + print(f" Found potential replacement: {file}") + + # Create a new chapter info with the correct filename + new_chapter_info = dict(chapter_info) + new_chapter_info['filename'] = os.path.join(directory, file) + + if os.path.exists(new_chapter_info['filename']): + if extra_verbose: + print(f" Replacement file exists, using it instead") + fixed_chapters.append(new_chapter_info) + found_replacement = True + break + except Exception as e: + if extra_verbose: + print(f" Error while searching for replacement: {str(e)}") + + if not found_replacement: + unresolved_chapters.append(chapter_info) + + return fixed_chapters, unresolved_chapters + +def organize_by_volume(cbz_files, extra_verbose=False): + """Group CBZ files by manga name and volume.""" + volumes = defaultdict(lambda: defaultdict(list)) + unparsed_files = [] + + for cbz_file in cbz_files: + info = parse_manga_filename(cbz_file) + if info: + manga_key = info['manga_name'].lower() + volumes[manga_key][info['volume']].append(info) + else: + unparsed_files.append(cbz_file) + + # Sort chapters within each volume + for manga in volumes: + for volume in volumes[manga]: + volumes[manga][volume].sort(key=lambda x: x['chapter']) + + if extra_verbose and unparsed_files: + print(f"\nWARNING: Could not parse {len(unparsed_files)} files:") + for file in unparsed_files[:10]: # Show first 10 only to avoid spam + print(f" - {os.path.basename(file)}") + if len(unparsed_files) > 10: + print(f" ... and {len(unparsed_files) - 10} more") + + return volumes + +def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, force=False, verbose=False, extra_verbose=False): + """Combine multiple chapter CBZ files into a single volume CBZ.""" + # Determine output path + volume_filename = f"{manga_name} - Volume {volume_num:02d}.cbz" + + if output_dir: + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, volume_filename) + else: + # Use the directory of the first chapter + output_path = os.path.join(os.path.dirname(chapter_infos[0]['filename']), volume_filename) + + if verbose: + print(f"Creating volume: {volume_filename}") + print(f"Output path: {output_path}") + + # Check if volume already exists + if os.path.exists(output_path) and not force: + if verbose: + print(f"Skipping {volume_filename} (already exists)") + return True, "Skipped (already exists)" + + try: + # Verify all chapter files exist before starting + missing_files = [] + for chapter_info in chapter_infos: + if not os.path.exists(chapter_info['filename']): + missing_files.append(chapter_info['filename']) + + if missing_files: + if extra_verbose: + print(f"Found {len(missing_files)} missing files, attempting to fix:") + for missing in missing_files: + print(f" - {os.path.basename(missing)}") + + # Try to fix missing files by finding alternatives + chapter_dir = os.path.dirname(chapter_infos[0]['filename']) + fixed_chapters, unresolved_chapters = fix_missing_files(chapter_infos, chapter_dir, extra_verbose) + + if unresolved_chapters: + if extra_verbose: + print(f"Could not resolve {len(unresolved_chapters)} missing files:") + for chapter in unresolved_chapters: + print(f" - {os.path.basename(chapter['filename'])}") + return False, f"Missing {len(unresolved_chapters)} chapter file(s) after resolution attempts" + + # Update chapter_infos with the fixed list + chapter_infos = fixed_chapters + + if extra_verbose: + print("All missing files resolved, proceeding with conversion") + + # Create a temporary directory for extraction + with tempfile.TemporaryDirectory() as temp_dir: + # Extract all chapters in order + for chapter_info in tqdm(chapter_infos, desc=f"Extracting chapters for Volume {volume_num}", disable=not verbose): + chapter_dir = os.path.join(temp_dir, f"chapter_{chapter_info['chapter_str'].zfill(3)}") + os.makedirs(chapter_dir, exist_ok=True) + + if verbose: + print(f"Extracting chapter {chapter_info['chapter_str']}") + + if extra_verbose: + print(f"File: {chapter_info['filename']}") + if not os.path.exists(chapter_info['filename']): + print(f" ERROR: File does not exist!") + continue + + try: + # Extract the chapter + with zipfile.ZipFile(chapter_info['filename'], 'r') as zf: + file_list = sorted(zf.namelist()) + + if extra_verbose: + print(f" Contains {len(file_list)} files:") + + for i, file_name in enumerate(file_list): + if file_name.endswith('/'): # Skip directories + continue + + if extra_verbose and i < 10: # Show first 10 files only + print(f" - {file_name}") + + # Extract with a standardized naming pattern: chapterXXX_pageYYY.ext + base, ext = os.path.splitext(os.path.basename(file_name)) + new_name = f"chapter{chapter_info['chapter_str'].zfill(3)}_{i+1:03d}{ext}" + + # Extract file to temp directory + try: + with zf.open(file_name) as source, open(os.path.join(chapter_dir, new_name), 'wb') as target: + shutil.copyfileobj(source, target) + except Exception as file_error: + if extra_verbose: + print(f" ERROR extracting {file_name}: {str(file_error)}") + + if extra_verbose and len(file_list) > 10: + print(f" ... and {len(file_list) - 10} more files") + except Exception as e: + return False, f"Error extracting chapter {chapter_info['chapter_str']}: {str(e)}" + + # Create the volume CBZ + if verbose: + print(f"Creating volume CBZ: {volume_filename}") + + try: + with zipfile.ZipFile(output_path, 'w') as volume_zip: + # Add all files from all chapters in order + chapter_dirs = sorted(os.listdir(temp_dir)) + + for chapter_dir in chapter_dirs: + chapter_path = os.path.join(temp_dir, chapter_dir) + if os.path.isdir(chapter_path): + chapter_files = sorted(os.listdir(chapter_path)) + + if extra_verbose: + print(f"Adding directory: {chapter_dir} ({len(chapter_files)} files)") + + for file in chapter_files: + file_path = os.path.join(chapter_path, file) + arc_name = os.path.join(chapter_dir, file) + + if extra_verbose and chapter_files.index(file) < 5: + print(f" - Adding {arc_name}") + + volume_zip.write(file_path, arc_name) + except Exception as e: + return False, f"Error creating volume ZIP: {str(e)}" + + return True, "Created" + except Exception as e: + return False, str(e) + +def main(): + parser = argparse.ArgumentParser(description='Combine individual CBZ chapters into volume CBZ files') + parser.add_argument('folder', help='Folder containing CBZ chapter files') + parser.add_argument('-r', '--recursive', action='store_true', help='Search for CBZ files recursively') + parser.add_argument('-o', '--output', help='Output folder for volume CBZ files (defaults to same location as chapters)') + parser.add_argument('-f', '--force', action='store_true', help='Force creation even if volume CBZ already exists') + parser.add_argument('-v', '--verbose', action='store_true', help='Show detailed progress') + parser.add_argument('-vv', '--extra-verbose', action='store_true', help='Show extremely detailed debugging information') + parser.add_argument('-m', '--min-chapters', type=int, default=2, + help='Minimum number of chapters required to create a volume (default: 2)') + + args = parser.parse_args() + + # If extra-verbose is enabled, automatically enable verbose too + if args.extra_verbose: + args.verbose = True + + if not os.path.isdir(args.folder): + print(f"Error: '{args.folder}' is not a valid directory") + return 1 + + cbz_files = find_cbz_files(args.folder, args.recursive, args.extra_verbose) + + if not cbz_files: + print(f"No CBZ files found in '{args.folder}'") + return 0 + + print(f"Found {len(cbz_files)} CBZ file(s)") + + # Organize files by manga and volume + volumes = organize_by_volume(cbz_files, args.extra_verbose) + + total_manga = len(volumes) + total_volumes = sum(len(volumes[manga]) for manga in volumes) + + print(f"Found {total_manga} manga series with {total_volumes} volume(s) to process") + + # Print detailed manga and volume information in extra verbose mode + if args.extra_verbose: + print("\nDetailed manga and volume breakdown:") + for manga_name in volumes: + try: + first_volume = min(volumes[manga_name].keys()) + manga_display_name = volumes[manga_name][first_volume][0]['manga_name'] + print(f"\n{manga_display_name}:") + for volume_num in sorted(volumes[manga_name].keys()): + chapters = volumes[manga_name][volume_num] + print(f" Volume {volume_num}: {len(chapters)} chapters") + if args.extra_verbose: + for chapter in chapters: + print(f" - Chapter {chapter['chapter_str']}: {os.path.basename(chapter['filename'])}") + print(f" Exists: {os.path.exists(chapter['filename'])}") + except (ValueError, IndexError): + pass + + success_count = 0 + skip_count = 0 + fail_count = 0 + ignored_count = 0 + + # Process each manga and volume + for manga_name in volumes: + # Get a proper display name from the first volume's first chapter + try: + first_volume = min(volumes[manga_name].keys()) + manga_display_name = volumes[manga_name][first_volume][0]['manga_name'] + except (ValueError, IndexError, KeyError): + # Fallback if we can't get proper name + manga_display_name = manga_name + + if args.verbose: + print(f"\nProcessing manga: {manga_display_name}") + + for volume_num in sorted(volumes[manga_name].keys()): + chapters = volumes[manga_name][volume_num] + + # Skip volumes with too few chapters + if len(chapters) < args.min_chapters: + if args.verbose: + print(f"Skipping Volume {volume_num} - only has {len(chapters)} chapter(s) (minimum is {args.min_chapters})") + ignored_count += 1 + continue + + # List all chapters for debugging + if args.extra_verbose: + print(f"\nChapters for {manga_display_name} Volume {volume_num}:") + for chapter in chapters: + print(f" - Chapter {chapter['chapter_str']}: {os.path.basename(chapter['filename'])}") + print(f" Exists: {os.path.exists(chapter['filename'])}") + + success, message = create_volume_cbz( + manga_display_name, + volume_num, + chapters, + args.output, + args.force, + args.verbose, + args.extra_verbose + ) + + if success: + if message == "Skipped (already exists)": + skip_count += 1 + else: + success_count += 1 + else: + fail_count += 1 + print(f"Error creating Volume {volume_num} for {manga_display_name}: {message}") + + print(f"\nVolume creation complete:") + print(f" - {success_count} volumes created successfully") + print(f" - {skip_count} volumes skipped (already exist)") + print(f" - {ignored_count} volumes ignored (too few chapters)") + print(f" - {fail_count} volumes failed") + + return 0 + +if __name__ == '__main__': + import sys + sys.exit(main()) \ No newline at end of file diff --git a/cbz_volume_combiner/core.py b/cbz_volume_combiner/core.py new file mode 100644 index 0000000..d765298 --- /dev/null +++ b/cbz_volume_combiner/core.py @@ -0,0 +1,31 @@ +# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/core.py +import os +from collections import defaultdict +from .parsing import parse_manga_filename + +def organize_by_volume(cbz_files, extra_verbose=False): + """Group CBZ files by manga name and volume.""" + volumes = defaultdict(lambda: defaultdict(list)) + unparsed_files = [] + + for cbz_file in cbz_files: + info = parse_manga_filename(cbz_file) + if info: + manga_key = info['manga_name'].lower() + volumes[manga_key][info['volume']].append(info) + else: + unparsed_files.append(cbz_file) + + # Sort chapters within each volume + for manga in volumes: + for volume in volumes[manga]: + volumes[manga][volume].sort(key=lambda x: x['chapter']) + + if extra_verbose and unparsed_files: + print(f"\nWARNING: Could not parse {len(unparsed_files)} files:") + for file in unparsed_files[:10]: # Show first 10 only to avoid spam + print(f" - {os.path.basename(file)}") + if len(unparsed_files) > 10: + print(f" ... and {len(unparsed_files) - 10} more") + + return volumes \ No newline at end of file diff --git a/cbz_volume_combiner/file_utils.py b/cbz_volume_combiner/file_utils.py new file mode 100644 index 0000000..a4c7a63 --- /dev/null +++ b/cbz_volume_combiner/file_utils.py @@ -0,0 +1,125 @@ +# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/file_utils.py +import os +import re +from .parsing import parse_manga_filename + +def find_cbz_files(folder_path, recursive=False, extra_verbose=False): + """Find all CBZ files in the given folder.""" + cbz_files = [] + + if recursive: + if extra_verbose: + print(f"Recursively searching for CBZ files in {folder_path}") + + for root, _, files in os.walk(folder_path): + for file in files: + if file.lower().endswith('.cbz'): + cbz_files.append(os.path.join(root, file)) + else: + if extra_verbose: + print(f"Searching for CBZ files in {folder_path} (non-recursive)") + + for file in os.listdir(folder_path): + if file.lower().endswith('.cbz'): + cbz_files.append(os.path.join(folder_path, file)) + + if extra_verbose: + print(f"Found {len(cbz_files)} CBZ files") + + return cbz_files + +def fix_missing_files(chapter_infos, folder_path, extra_verbose=False): + """Attempt to find missing files by searching for similar filenames.""" + fixed_chapters = [] + unresolved_chapters = [] + + for chapter_info in chapter_infos: + # Check if file exists first + if os.path.exists(chapter_info['filename']): + fixed_chapters.append(chapter_info) + continue + + # If we're here, the file doesn't exist - get the problematic filename + base_filename = os.path.basename(chapter_info['filename']) + directory = os.path.dirname(chapter_info['filename']) + + if extra_verbose: + print(f"Trying to find replacement for: {base_filename}") + + # First approach: Direct check with normalized path + # This is for cases where Python's path handling might be different from the filesystem + found_replacement = False + + # Look for similar files in the directory + try: + # Get all CBZ files in the directory + cbz_files_in_dir = [f for f in os.listdir(directory) if f.lower().endswith('.cbz')] + + # First try: Look for exact matches with volume and chapter numbers + vol_num = chapter_info['volume'] + chap_num = chapter_info['chapter_str'] + + # Improved pattern matching for volume and chapter numbers + vol_pattern = f"v{vol_num:02d}" # e.g., "v07" + chap_pattern = f"c{chap_num}" # e.g., "c037" + + # Try to find a direct match first + for file in cbz_files_in_dir: + # Check volume and chapter patterns + if vol_pattern in file and chap_pattern in file: + potential_path = os.path.join(directory, file) + if os.path.exists(potential_path): + if extra_verbose: + print(f" Found direct match: {file}") + new_chapter_info = dict(chapter_info) + new_chapter_info['filename'] = potential_path + fixed_chapters.append(new_chapter_info) + found_replacement = True + break + + # If no direct match, try more flexible matching + if not found_replacement: + # Fuzzy match approach + chapter_pattern = rf"v0*{vol_num}\s+c0*{chap_num.lstrip('0')}" + + for file in cbz_files_in_dir: + # Remove special characters for comparison + clean_file = file + for char in "'?,": + clean_file = clean_file.replace(char, '') + + # Strip special characters from the pattern too + clean_pattern = chapter_pattern + for char in "'?,": + clean_pattern = clean_pattern.replace(char, '') + + # Try matching with cleaned strings + if re.search(chapter_pattern, file, re.IGNORECASE) or re.search(clean_pattern, clean_file, re.IGNORECASE): + potential_path = os.path.join(directory, file) + if os.path.exists(potential_path): + if extra_verbose: + print(f" Found fuzzy match: {file}") + new_chapter_info = dict(chapter_info) + new_chapter_info['filename'] = potential_path + fixed_chapters.append(new_chapter_info) + found_replacement = True + break + + # Last resort: List all files and let the user see what's available + if not found_replacement and extra_verbose: + print(" No match found. Available files in directory:") + for idx, file in enumerate(sorted(cbz_files_in_dir)): + if idx < 20: # Limit to first 20 files to avoid spam + print(f" - {file}") + else: + print(f" ... and {len(cbz_files_in_dir) - 20} more files") + break + + except Exception as e: + if extra_verbose: + print(f" Error while searching for replacement: {str(e)}") + + if not found_replacement: + unresolved_chapters.append(chapter_info) + + return fixed_chapters, unresolved_chapters \ No newline at end of file diff --git a/cbz_volume_combiner/parsing.py b/cbz_volume_combiner/parsing.py new file mode 100644 index 0000000..6ac37d2 --- /dev/null +++ b/cbz_volume_combiner/parsing.py @@ -0,0 +1,71 @@ +# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/parsing.py +import os +import re + +def normalize_filename(filename): + """Normalize a filename to handle encoding issues and special characters.""" + # Replace common problematic characters + replacements = { + '?': "'", # Replace question marks that might be incorrectly encoded apostrophes + '?': "'", # Another possible encoding of apostrophe + '?': '"', # Possible encoding of double quote + '?': '-', # Possible encoding of dash + '?': ' ' # Possible encoding of space + } + + for bad_char, good_char in replacements.items(): + filename = filename.replace(bad_char, good_char) + + return filename + +def parse_manga_filename(filename): + """Extract volume, chapter and title information from a manga filename.""" + # Pattern to match: manga_name v## c### [optional title] [optional group] + base_filename = os.path.basename(filename) + + # Try to normalize the filename to handle encoding issues + normalized_filename = normalize_filename(base_filename) + + # Use a more flexible pattern to handle apostrophes and other special characters + pattern = r'(.*?)\s+v(\d+)\s+c(\d+[.\d-]*)\s+(.*)\.cbz$' + match = re.match(pattern, normalized_filename) + + if match: + manga_name = match.group(1).strip() + volume = int(match.group(2)) + chapter_str = match.group(3) + rest = match.group(4) + + # Try to extract title and group if available + group_match = re.search(r'\[(.*?)\]$', rest) + if group_match: + group = group_match.group(1) + title = rest[:rest.rfind('[')-1].strip() + else: + group = "" + title = rest.strip() + + # Handle chapter numbers like "005.5" or "005-006" + try: + chapter = float(chapter_str) + except ValueError: + try: + chapter = float(chapter_str.split('-')[0]) # Take first number for ranges + except ValueError: + chapter = 0 # Fallback for unparseable chapter numbers + + return { + 'manga_name': manga_name, + 'volume': volume, + 'chapter': chapter, + 'chapter_str': chapter_str, + 'title': title, + 'group': group, + 'filename': filename + } + + if os.path.exists(filename): + # For debugging: print the filename that couldn't be parsed + print(f"WARNING: Could not parse filename: {base_filename}") + + return None \ No newline at end of file diff --git a/cbz_volume_combiner/volume.py b/cbz_volume_combiner/volume.py new file mode 100644 index 0000000..a63fe22 --- /dev/null +++ b/cbz_volume_combiner/volume.py @@ -0,0 +1,159 @@ +# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/volume.py +import os +import zipfile +import tempfile +import shutil +from tqdm import tqdm +from .file_utils import fix_missing_files + +def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, force=False, verbose=False, extra_verbose=False): + """Combine multiple chapter CBZ files into a single volume CBZ.""" + # Determine output path + volume_filename = f"{manga_name} - Volume {volume_num:02d}.cbz" + + if output_dir: + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, volume_filename) + else: + # Use the directory of the first chapter + output_path = os.path.join(os.path.dirname(chapter_infos[0]['filename']), volume_filename) + + if verbose: + print(f"Creating volume: {volume_filename}") + print(f"Output path: {output_path}") + + # Check if volume already exists + if os.path.exists(output_path) and not force: + if verbose: + print(f"Skipping {volume_filename} (already exists)") + return True, "Skipped (already exists)" + + try: + # Verify all chapter files exist before starting and attempt to fix missing files + missing_files = [] + for chapter_info in chapter_infos: + if not os.path.exists(chapter_info['filename']): + missing_files.append(chapter_info['filename']) + + if missing_files: + if extra_verbose: + print(f"Found {len(missing_files)} missing files, attempting to fix:") + for missing in missing_files: + print(f" - {os.path.basename(missing)}") + + # Try to fix missing files by finding alternatives + chapter_dir = os.path.dirname(chapter_infos[0]['filename']) + fixed_chapters, unresolved_chapters = fix_missing_files(chapter_infos, chapter_dir, extra_verbose) + + if unresolved_chapters: + if extra_verbose: + print(f"Could not resolve {len(unresolved_chapters)} missing files:") + for chapter in unresolved_chapters: + print(f" - {os.path.basename(chapter['filename'])}") + + # Ask if user wants to skip unresolved chapters and continue with available ones + if len(fixed_chapters) >= 2: # Only offer to continue if we have at least 2 chapters + print(f"WARNING: {len(unresolved_chapters)} chapters couldn't be found.") + print(f"Would you like to continue with the {len(fixed_chapters)} available chapters? (y/n)") + response = input().strip().lower() + + if response != 'y': + return False, f"Missing {len(unresolved_chapters)} chapter file(s) after resolution attempts" + + print(f"Continuing with {len(fixed_chapters)} available chapters") + chapter_infos = fixed_chapters + else: + return False, f"Missing {len(unresolved_chapters)} chapter file(s) after resolution attempts" + else: + # Update chapter_infos with the fixed list + chapter_infos = fixed_chapters + + if extra_verbose: + print("All missing files resolved, proceeding with conversion") + + # Final verification before proceeding + for chapter_info in chapter_infos: + if not os.path.exists(chapter_info['filename']): + if extra_verbose: + print(f"Fatal error: File still missing after resolution: {chapter_info['filename']}") + return False, f"File still missing after resolution: {os.path.basename(chapter_info['filename'])}" + + # Create a temporary directory for extraction + with tempfile.TemporaryDirectory() as temp_dir: + # Extract all chapters in order + for chapter_info in tqdm(chapter_infos, desc=f"Extracting chapters for Volume {volume_num}", disable=not verbose): + chapter_dir = os.path.join(temp_dir, f"chapter_{chapter_info['chapter_str'].zfill(3)}") + os.makedirs(chapter_dir, exist_ok=True) + + if verbose: + print(f"Extracting chapter {chapter_info['chapter_str']}") + + if extra_verbose: + print(f"File: {chapter_info['filename']}") + if not os.path.exists(chapter_info['filename']): + print(f" ERROR: File does not exist!") + continue + + try: + # Extract the chapter + with zipfile.ZipFile(chapter_info['filename'], 'r') as zf: + file_list = sorted(zf.namelist()) + + if extra_verbose: + print(f" Contains {len(file_list)} files:") + + for i, file_name in enumerate(file_list): + if file_name.endswith('/'): # Skip directories + continue + + if extra_verbose and i < 10: # Show first 10 files only + print(f" - {file_name}") + + # Extract with a standardized naming pattern: chapterXXX_pageYYY.ext + base, ext = os.path.splitext(os.path.basename(file_name)) + new_name = f"chapter{chapter_info['chapter_str'].zfill(3)}_{i+1:03d}{ext}" + + # Extract file to temp directory + try: + with zf.open(file_name) as source, open(os.path.join(chapter_dir, new_name), 'wb') as target: + shutil.copyfileobj(source, target) + except Exception as file_error: + if extra_verbose: + print(f" ERROR extracting {file_name}: {str(file_error)}") + + if extra_verbose and len(file_list) > 10: + print(f" ... and {len(file_list) - 10} more files") + except Exception as e: + return False, f"Error extracting chapter {chapter_info['chapter_str']}: {str(e)}" + + # Create the volume CBZ + if verbose: + print(f"Creating volume CBZ: {volume_filename}") + + try: + with zipfile.ZipFile(output_path, 'w') as volume_zip: + # Add all files from all chapters in order + chapter_dirs = sorted(os.listdir(temp_dir)) + + for chapter_dir in chapter_dirs: + chapter_path = os.path.join(temp_dir, chapter_dir) + if os.path.isdir(chapter_path): + chapter_files = sorted(os.listdir(chapter_path)) + + if extra_verbose: + print(f"Adding directory: {chapter_dir} ({len(chapter_files)} files)") + + for file in chapter_files: + file_path = os.path.join(chapter_path, file) + arc_name = os.path.join(chapter_dir, file) + + if extra_verbose and chapter_files.index(file) < 5: + print(f" - Adding {arc_name}") + + volume_zip.write(file_path, arc_name) + except Exception as e: + return False, f"Error creating volume ZIP: {str(e)}" + + return True, "Created" + except Exception as e: + return False, str(e) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fa9cf06 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +tqdm \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..4ebae8c --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +from setuptools import setup, find_packages + +setup( + name="cbz-volume-combiner", + version="0.2.0", + packages=find_packages(), + scripts=['bin/cbz-volume-combiner'], + install_requires=[ + 'tqdm', + ], + author="b3nw", + author_email="b3nw@duck.com", + description="A tool to combine individual CBZ chapters into volume CBZ files", + keywords="cbz, manga, comics", + url="https://github.com/b3nw/cbz-volume-combiner", + classifiers=[ + "Development Status :: 3 - Alpha", + "Environment :: Console", + "Intended Audience :: End Users/Desktop", + "Programming Language :: Python :: 3", + "Topic :: Utilities", + ], +) \ No newline at end of file