import os import re import unicodedata from .parsing import parse_manga_filename def is_emoji(char): """Check if a character is an emoji or other problematic symbol.""" return ( unicodedata.category(char) in ('So', 'Sm') or # Symbol categories ord(char) > 0x1F000 # Emoji range ) def has_problematic_characters(filename): """Check if filename contains emoji or problematic characters.""" basename = os.path.basename(filename) for char in basename: if is_emoji(char) or char == '?': return True, char return False, None def find_cbz_files(folder_path, recursive=False, extra_verbose=False): """Find all CBZ files in the given folder.""" cbz_files = [] if recursive: if extra_verbose: print(f"Recursively searching for CBZ files in {folder_path}") for root, _, files in os.walk(folder_path): for file in files: if file.lower().endswith('.cbz'): cbz_files.append(os.path.join(root, file)) else: if extra_verbose: print(f"Searching for CBZ files in {folder_path} (non-recursive)") for file in os.listdir(folder_path): if file.lower().endswith('.cbz'): cbz_files.append(os.path.join(folder_path, file)) if extra_verbose: print(f"Found {len(cbz_files)} CBZ files") return cbz_files def fix_missing_files(chapter_infos, folder_path, extra_verbose=False): """Attempt to find missing files by searching for similar filenames.""" fixed_chapters = [] unresolved_chapters = [] for chapter_info in chapter_infos: # Check if file exists first if os.path.exists(chapter_info['filename']): fixed_chapters.append(chapter_info) continue # If file doesn't exist, look for it by volume and chapter directory = os.path.dirname(chapter_info['filename']) vol_num = chapter_info['volume'] chap_num = chapter_info['chapter_str'] if extra_verbose: print(f"Looking for alternative for {os.path.basename(chapter_info['filename'])}") # Find file by volume and chapter numbers directly actual_file = find_file_by_volume_chapter(directory, vol_num, chap_num, extra_verbose) if actual_file and os.path.exists(actual_file): if extra_verbose: print(f"Found alternative file: {os.path.basename(actual_file)}") new_chapter_info = dict(chapter_info) new_chapter_info['filename'] = actual_file fixed_chapters.append(new_chapter_info) else: if extra_verbose: print(f"No alternative found for volume {vol_num}, chapter {chap_num}") unresolved_chapters.append(chapter_info) return fixed_chapters, unresolved_chapters def find_file_by_volume_chapter(directory, volume_num, chapter_num, extra_verbose=False): """Find a file in a directory by its volume and chapter number only.""" if not os.path.exists(directory): if extra_verbose: print(f"Directory does not exist: {directory}") return None if extra_verbose: print(f"Looking for volume {volume_num}, chapter {chapter_num} in {directory}") try: files = [f for f in os.listdir(directory) if f.lower().endswith('.cbz')] # We'll only focus on the essential part: finding files by volume and chapter numbers # regardless of special characters or encoding issues # First: Look for exact pattern matches for file in files: # Look for both zero-padded and non-zero-padded versions v_patterns = [f"v{volume_num:02d}", f"v{volume_num}"] c_patterns = [f"c{chapter_num}", f"c{chapter_num.zfill(3)}" if chapter_num.isdigit() else f"c{chapter_num}"] for v_pattern in v_patterns: for c_pattern in c_patterns: if v_pattern in file and c_pattern in file: if extra_verbose: print(f"Found exact match: {file}") return os.path.join(directory, file) # Second: Use regex pattern matching for file in files: v_match = re.search(fr'v0*{volume_num}[^0-9]', file.lower()) c_match = re.search(fr'c0*{chapter_num}[^0-9]', file.lower()) if v_match and c_match: if extra_verbose: print(f"Found regex match: {file}") return os.path.join(directory, file) # Third: Last resort - simplified alphanumeric comparison for file in files: clean_file = ''.join(c.lower() for c in file if c.isalnum()) if f"v{volume_num}" in clean_file and f"c{chapter_num}" in clean_file: if extra_verbose: print(f"Found simplified match: {file}") return os.path.join(directory, file) if extra_verbose: print(f"No match found for volume {volume_num}, chapter {chapter_num}") print("Available files:") for file in sorted(files)[:10]: print(f" - {file}") if len(files) > 10: print(f" ... and {len(files) - 10} more") return None except Exception as e: if extra_verbose: print(f"Error searching for file: {e}") return None def create_clean_filename_mapping(cbz_files, extra_verbose=False): """Create a mapping of problematic filenames to clean alternatives.""" filename_mapping = {} for file_path in cbz_files: has_problem, _ = has_problematic_characters(file_path) if has_problem: # Get the directory and filename directory = os.path.dirname(file_path) filename = os.path.basename(file_path) # Create a clean version of the filename by removing problematic characters clean_filename = ''.join(c if not is_emoji(c) and c != '?' else '_' for c in filename) # Make sure we don't create duplicates base, ext = os.path.splitext(clean_filename) counter = 1 while os.path.exists(os.path.join(directory, clean_filename)): clean_filename = f"{base}_{counter}{ext}" counter += 1 # Add to mapping filename_mapping[file_path] = os.path.join(directory, clean_filename) if extra_verbose: print(f"Mapped: {filename} -> {clean_filename}") return filename_mapping