cbz-volume-combiner/cbz_volume_combiner/core.py

# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/core.py
import os
from collections import defaultdict
from .parsing import parse_manga_filename
from .file_utils import has_problematic_characters, find_file_by_volume_chapter

def organize_by_volume(cbz_files, extra_verbose=False):
    """Group CBZ files by manga name and volume."""
    volumes = defaultdict(lambda: defaultdict(list))
    unparsed_files = []

    # First, identify any files with problematic characters
    problematic_files = []
    for cbz_file in cbz_files:
        has_problem, _ = has_problematic_characters(cbz_file)
        if has_problem:
            problematic_files.append(cbz_file)

    if problematic_files and extra_verbose:
        print(f"\nWARNING: Found {len(problematic_files)} problematic filenames that might need special handling.")

    for cbz_file in cbz_files:
        info = parse_manga_filename(cbz_file)
        if info:
            manga_key = info['manga_name'].lower()
            volumes[manga_key][info['volume']].append(info)
        else:
            unparsed_files.append(cbz_file)

            # For unparsed files that have problematic characters,
            # try to get volume and chapter from filename pattern directly
            has_problem, _ = has_problematic_characters(cbz_file)
            if has_problem:
                if extra_verbose:
                    print(f"Attempting alternative parsing for problematic file: {os.path.basename(cbz_file)}")

                # Extract basic info using more lenient pattern
                base_filename = os.path.basename(cbz_file)
                # Look for v## and c### patterns
                vol_match = re.search(r'v(\d+)', base_filename)
                chap_match = re.search(r'c(\d+(?:\.\d+)?)', base_filename)

                if vol_match and chap_match:
                    # Extract manga name (everything before v##)
                    vol_pos = base_filename.find(f"v{vol_match.group(1)}")
                    manga_name = base_filename[:vol_pos].strip()

                    # Create a basic info dict
                    vol_num = int(vol_match.group(1))
                    chap_str = chap_match.group(1)

                    try:
                        chap_num = float(chap_str)
                    except ValueError:
                        chap_num = 0

                    if extra_verbose:
                        print(f"  Extracted: manga={manga_name}, vol={vol_num}, chap={chap_str}")

                    info = {
                        'manga_name': manga_name,
                        'volume': vol_num,
                        'chapter': chap_num,
                        'chapter_str': chap_str,
                        'title': '',
                        'group': '',
                        'filename': cbz_file
                    }

                    manga_key = manga_name.lower()
                    volumes[manga_key][vol_num].append(info)
                    # Remove from unparsed files since we handled it
                    unparsed_files.remove(cbz_file)

    # Sort chapters within each volume
    for manga in volumes:
        for volume in volumes[manga]:
            volumes[manga][volume].sort(key=lambda x: x['chapter'])

    if extra_verbose and unparsed_files:
        print(f"\nWARNING: Could not parse {len(unparsed_files)} files:")
        for file in unparsed_files[:10]:  # Show first 10 only to avoid spam
            print(f"  - {os.path.basename(file)}")
        if len(unparsed_files) > 10:
            print(f"  ... and {len(unparsed_files) - 10} more")

    return volumes