diff --git a/cbz_volume_combiner/__init__.py b/cbz_volume_combiner/__init__.py index b6dd096..48b6de5 100644 --- a/cbz_volume_combiner/__init__.py +++ b/cbz_volume_combiner/__init__.py @@ -4,4 +4,4 @@ from .file_utils import find_cbz_files, fix_missing_files from .core import organize_by_volume from .volume import create_volume_cbz -__version__ = "0.2.0" \ No newline at end of file +__version__ = "0.2.2" \ No newline at end of file diff --git a/cbz_volume_combiner/core.py b/cbz_volume_combiner/core.py index d765298..d4ecb95 100644 --- a/cbz_volume_combiner/core.py +++ b/cbz_volume_combiner/core.py @@ -2,12 +2,23 @@ import os from collections import defaultdict from .parsing import parse_manga_filename +from .file_utils import has_problematic_characters, find_file_by_volume_chapter def organize_by_volume(cbz_files, extra_verbose=False): """Group CBZ files by manga name and volume.""" volumes = defaultdict(lambda: defaultdict(list)) unparsed_files = [] + # First, identify any files with problematic characters + problematic_files = [] + for cbz_file in cbz_files: + has_problem, _ = has_problematic_characters(cbz_file) + if has_problem: + problematic_files.append(cbz_file) + + if problematic_files and extra_verbose: + print(f"\nWARNING: Found {len(problematic_files)} problematic filenames that might need special handling.") + for cbz_file in cbz_files: info = parse_manga_filename(cbz_file) if info: @@ -15,6 +26,51 @@ def organize_by_volume(cbz_files, extra_verbose=False): volumes[manga_key][info['volume']].append(info) else: unparsed_files.append(cbz_file) + + # For unparsed files that have problematic characters, + # try to get volume and chapter from filename pattern directly + has_problem, _ = has_problematic_characters(cbz_file) + if has_problem: + if extra_verbose: + print(f"Attempting alternative parsing for problematic file: {os.path.basename(cbz_file)}") + + # Extract basic info using more lenient pattern + base_filename = os.path.basename(cbz_file) + # Look for v## and c### patterns + vol_match = re.search(r'v(\d+)', base_filename) + chap_match = re.search(r'c(\d+(?:\.\d+)?)', base_filename) + + if vol_match and chap_match: + # Extract manga name (everything before v##) + vol_pos = base_filename.find(f"v{vol_match.group(1)}") + manga_name = base_filename[:vol_pos].strip() + + # Create a basic info dict + vol_num = int(vol_match.group(1)) + chap_str = chap_match.group(1) + + try: + chap_num = float(chap_str) + except ValueError: + chap_num = 0 + + if extra_verbose: + print(f" Extracted: manga={manga_name}, vol={vol_num}, chap={chap_str}") + + info = { + 'manga_name': manga_name, + 'volume': vol_num, + 'chapter': chap_num, + 'chapter_str': chap_str, + 'title': '', + 'group': '', + 'filename': cbz_file + } + + manga_key = manga_name.lower() + volumes[manga_key][vol_num].append(info) + # Remove from unparsed files since we handled it + unparsed_files.remove(cbz_file) # Sort chapters within each volume for manga in volumes: diff --git a/cbz_volume_combiner/file_utils.py b/cbz_volume_combiner/file_utils.py index a4c7a63..4200e9b 100644 --- a/cbz_volume_combiner/file_utils.py +++ b/cbz_volume_combiner/file_utils.py @@ -1,8 +1,23 @@ -# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/file_utils.py import os import re +import unicodedata from .parsing import parse_manga_filename +def is_emoji(char): + """Check if a character is an emoji or other problematic symbol.""" + return ( + unicodedata.category(char) in ('So', 'Sm') or # Symbol categories + ord(char) > 0x1F000 # Emoji range + ) + +def has_problematic_characters(filename): + """Check if filename contains emoji or problematic characters.""" + basename = os.path.basename(filename) + for char in basename: + if is_emoji(char) or char == '?': + return True, char + return False, None + def find_cbz_files(folder_path, recursive=False, extra_verbose=False): """Find all CBZ files in the given folder.""" cbz_files = [] @@ -39,87 +54,118 @@ def fix_missing_files(chapter_infos, folder_path, extra_verbose=False): fixed_chapters.append(chapter_info) continue - # If we're here, the file doesn't exist - get the problematic filename - base_filename = os.path.basename(chapter_info['filename']) + # If file doesn't exist, look for it by volume and chapter directory = os.path.dirname(chapter_info['filename']) + vol_num = chapter_info['volume'] + chap_num = chapter_info['chapter_str'] if extra_verbose: - print(f"Trying to find replacement for: {base_filename}") + print(f"Looking for alternative for {os.path.basename(chapter_info['filename'])}") - # First approach: Direct check with normalized path - # This is for cases where Python's path handling might be different from the filesystem - found_replacement = False + # Find file by volume and chapter numbers directly + actual_file = find_file_by_volume_chapter(directory, vol_num, chap_num, extra_verbose) - # Look for similar files in the directory - try: - # Get all CBZ files in the directory - cbz_files_in_dir = [f for f in os.listdir(directory) if f.lower().endswith('.cbz')] - - # First try: Look for exact matches with volume and chapter numbers - vol_num = chapter_info['volume'] - chap_num = chapter_info['chapter_str'] - - # Improved pattern matching for volume and chapter numbers - vol_pattern = f"v{vol_num:02d}" # e.g., "v07" - chap_pattern = f"c{chap_num}" # e.g., "c037" - - # Try to find a direct match first - for file in cbz_files_in_dir: - # Check volume and chapter patterns - if vol_pattern in file and chap_pattern in file: - potential_path = os.path.join(directory, file) - if os.path.exists(potential_path): - if extra_verbose: - print(f" Found direct match: {file}") - new_chapter_info = dict(chapter_info) - new_chapter_info['filename'] = potential_path - fixed_chapters.append(new_chapter_info) - found_replacement = True - break - - # If no direct match, try more flexible matching - if not found_replacement: - # Fuzzy match approach - chapter_pattern = rf"v0*{vol_num}\s+c0*{chap_num.lstrip('0')}" - - for file in cbz_files_in_dir: - # Remove special characters for comparison - clean_file = file - for char in "'?,": - clean_file = clean_file.replace(char, '') - - # Strip special characters from the pattern too - clean_pattern = chapter_pattern - for char in "'?,": - clean_pattern = clean_pattern.replace(char, '') - - # Try matching with cleaned strings - if re.search(chapter_pattern, file, re.IGNORECASE) or re.search(clean_pattern, clean_file, re.IGNORECASE): - potential_path = os.path.join(directory, file) - if os.path.exists(potential_path): - if extra_verbose: - print(f" Found fuzzy match: {file}") - new_chapter_info = dict(chapter_info) - new_chapter_info['filename'] = potential_path - fixed_chapters.append(new_chapter_info) - found_replacement = True - break - - # Last resort: List all files and let the user see what's available - if not found_replacement and extra_verbose: - print(" No match found. Available files in directory:") - for idx, file in enumerate(sorted(cbz_files_in_dir)): - if idx < 20: # Limit to first 20 files to avoid spam - print(f" - {file}") - else: - print(f" ... and {len(cbz_files_in_dir) - 20} more files") - break - - except Exception as e: + if actual_file and os.path.exists(actual_file): if extra_verbose: - print(f" Error while searching for replacement: {str(e)}") + print(f"Found alternative file: {os.path.basename(actual_file)}") - if not found_replacement: + new_chapter_info = dict(chapter_info) + new_chapter_info['filename'] = actual_file + fixed_chapters.append(new_chapter_info) + else: + if extra_verbose: + print(f"No alternative found for volume {vol_num}, chapter {chap_num}") unresolved_chapters.append(chapter_info) - return fixed_chapters, unresolved_chapters \ No newline at end of file + return fixed_chapters, unresolved_chapters + +def find_file_by_volume_chapter(directory, volume_num, chapter_num, extra_verbose=False): + """Find a file in a directory by its volume and chapter number only.""" + if not os.path.exists(directory): + if extra_verbose: + print(f"Directory does not exist: {directory}") + return None + + if extra_verbose: + print(f"Looking for volume {volume_num}, chapter {chapter_num} in {directory}") + + try: + files = [f for f in os.listdir(directory) if f.lower().endswith('.cbz')] + + # We'll only focus on the essential part: finding files by volume and chapter numbers + # regardless of special characters or encoding issues + + # First: Look for exact pattern matches + for file in files: + # Look for both zero-padded and non-zero-padded versions + v_patterns = [f"v{volume_num:02d}", f"v{volume_num}"] + c_patterns = [f"c{chapter_num}", f"c{chapter_num.zfill(3)}" if chapter_num.isdigit() else f"c{chapter_num}"] + + for v_pattern in v_patterns: + for c_pattern in c_patterns: + if v_pattern in file and c_pattern in file: + if extra_verbose: + print(f"Found exact match: {file}") + return os.path.join(directory, file) + + # Second: Use regex pattern matching + for file in files: + v_match = re.search(fr'v0*{volume_num}[^0-9]', file.lower()) + c_match = re.search(fr'c0*{chapter_num}[^0-9]', file.lower()) + + if v_match and c_match: + if extra_verbose: + print(f"Found regex match: {file}") + return os.path.join(directory, file) + + # Third: Last resort - simplified alphanumeric comparison + for file in files: + clean_file = ''.join(c.lower() for c in file if c.isalnum()) + if f"v{volume_num}" in clean_file and f"c{chapter_num}" in clean_file: + if extra_verbose: + print(f"Found simplified match: {file}") + return os.path.join(directory, file) + + if extra_verbose: + print(f"No match found for volume {volume_num}, chapter {chapter_num}") + print("Available files:") + for file in sorted(files)[:10]: + print(f" - {file}") + if len(files) > 10: + print(f" ... and {len(files) - 10} more") + + return None + except Exception as e: + if extra_verbose: + print(f"Error searching for file: {e}") + return None + +def create_clean_filename_mapping(cbz_files, extra_verbose=False): + """Create a mapping of problematic filenames to clean alternatives.""" + filename_mapping = {} + + for file_path in cbz_files: + has_problem, _ = has_problematic_characters(file_path) + + if has_problem: + # Get the directory and filename + directory = os.path.dirname(file_path) + filename = os.path.basename(file_path) + + # Create a clean version of the filename by removing problematic characters + clean_filename = ''.join(c if not is_emoji(c) and c != '?' else '_' for c in filename) + + # Make sure we don't create duplicates + base, ext = os.path.splitext(clean_filename) + counter = 1 + while os.path.exists(os.path.join(directory, clean_filename)): + clean_filename = f"{base}_{counter}{ext}" + counter += 1 + + # Add to mapping + filename_mapping[file_path] = os.path.join(directory, clean_filename) + + if extra_verbose: + print(f"Mapped: {filename} -> {clean_filename}") + + return filename_mapping \ No newline at end of file diff --git a/cbz_volume_combiner/volume.py b/cbz_volume_combiner/volume.py index a63fe22..fab05c9 100644 --- a/cbz_volume_combiner/volume.py +++ b/cbz_volume_combiner/volume.py @@ -4,7 +4,7 @@ import zipfile import tempfile import shutil from tqdm import tqdm -from .file_utils import fix_missing_files +from .file_utils import fix_missing_files, find_file_by_volume_chapter, has_problematic_characters def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, force=False, verbose=False, extra_verbose=False): """Combine multiple chapter CBZ files into a single volume CBZ.""" @@ -88,11 +88,36 @@ def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, fo if verbose: print(f"Extracting chapter {chapter_info['chapter_str']}") + # IMPORTANT: Handle file matching right before extraction + file_exists = os.path.exists(chapter_info['filename']) + + if not file_exists: + # Try to find the file by volume and chapter + directory = os.path.dirname(chapter_info['filename']) + vol_num = chapter_info['volume'] + chap_num = chapter_info['chapter_str'] + + if extra_verbose: + print(f"File not found: {chapter_info['filename']}") + print(f"Looking for alternative file with volume {vol_num}, chapter {chap_num}") + + # Find by volume and chapter numbers only + actual_file = find_file_by_volume_chapter(directory, vol_num, chap_num, extra_verbose) + + if actual_file and os.path.exists(actual_file): + chapter_info['filename'] = actual_file + file_exists = True + if extra_verbose: + print(f"Found alternative file: {actual_file}") + + # Skip if file still doesn't exist + if not file_exists: + if extra_verbose: + print(f"Skipping chapter {chapter_info['chapter_str']} - file not found") + continue + if extra_verbose: print(f"File: {chapter_info['filename']}") - if not os.path.exists(chapter_info['filename']): - print(f" ERROR: File does not exist!") - continue try: # Extract the chapter @@ -100,16 +125,13 @@ def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, fo file_list = sorted(zf.namelist()) if extra_verbose: - print(f" Contains {len(file_list)} files:") + print(f" Contains {len(file_list)} files") for i, file_name in enumerate(file_list): if file_name.endswith('/'): # Skip directories continue - if extra_verbose and i < 10: # Show first 10 files only - print(f" - {file_name}") - - # Extract with a standardized naming pattern: chapterXXX_pageYYY.ext + # Extract with a standardized naming pattern base, ext = os.path.splitext(os.path.basename(file_name)) new_name = f"chapter{chapter_info['chapter_str'].zfill(3)}_{i+1:03d}{ext}" @@ -120,11 +142,10 @@ def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, fo except Exception as file_error: if extra_verbose: print(f" ERROR extracting {file_name}: {str(file_error)}") - - if extra_verbose and len(file_list) > 10: - print(f" ... and {len(file_list) - 10} more files") except Exception as e: - return False, f"Error extracting chapter {chapter_info['chapter_str']}: {str(e)}" + if extra_verbose: + print(f"Error extracting chapter: {e}") + continue # Skip this chapter but continue with others # Create the volume CBZ if verbose: diff --git a/setup.py b/setup.py index 4ebae8c..71822c7 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name="cbz-volume-combiner", - version="0.2.0", + version="0.2.2", packages=find_packages(), scripts=['bin/cbz-volume-combiner'], install_requires=[