171 lines
6.7 KiB
Python
171 lines
6.7 KiB
Python
import os
|
|
import re
|
|
import unicodedata
|
|
from .parsing import parse_manga_filename
|
|
|
|
def is_emoji(char):
|
|
"""Check if a character is an emoji or other problematic symbol."""
|
|
return (
|
|
unicodedata.category(char) in ('So', 'Sm') or # Symbol categories
|
|
ord(char) > 0x1F000 # Emoji range
|
|
)
|
|
|
|
def has_problematic_characters(filename):
|
|
"""Check if filename contains emoji or problematic characters."""
|
|
basename = os.path.basename(filename)
|
|
for char in basename:
|
|
if is_emoji(char) or char == '?':
|
|
return True, char
|
|
return False, None
|
|
|
|
def find_cbz_files(folder_path, recursive=False, extra_verbose=False):
|
|
"""Find all CBZ files in the given folder."""
|
|
cbz_files = []
|
|
|
|
if recursive:
|
|
if extra_verbose:
|
|
print(f"Recursively searching for CBZ files in {folder_path}")
|
|
|
|
for root, _, files in os.walk(folder_path):
|
|
for file in files:
|
|
if file.lower().endswith('.cbz'):
|
|
cbz_files.append(os.path.join(root, file))
|
|
else:
|
|
if extra_verbose:
|
|
print(f"Searching for CBZ files in {folder_path} (non-recursive)")
|
|
|
|
for file in os.listdir(folder_path):
|
|
if file.lower().endswith('.cbz'):
|
|
cbz_files.append(os.path.join(folder_path, file))
|
|
|
|
if extra_verbose:
|
|
print(f"Found {len(cbz_files)} CBZ files")
|
|
|
|
return cbz_files
|
|
|
|
def fix_missing_files(chapter_infos, folder_path, extra_verbose=False):
|
|
"""Attempt to find missing files by searching for similar filenames."""
|
|
fixed_chapters = []
|
|
unresolved_chapters = []
|
|
|
|
for chapter_info in chapter_infos:
|
|
# Check if file exists first
|
|
if os.path.exists(chapter_info['filename']):
|
|
fixed_chapters.append(chapter_info)
|
|
continue
|
|
|
|
# If file doesn't exist, look for it by volume and chapter
|
|
directory = os.path.dirname(chapter_info['filename'])
|
|
vol_num = chapter_info['volume']
|
|
chap_num = chapter_info['chapter_str']
|
|
|
|
if extra_verbose:
|
|
print(f"Looking for alternative for {os.path.basename(chapter_info['filename'])}")
|
|
|
|
# Find file by volume and chapter numbers directly
|
|
actual_file = find_file_by_volume_chapter(directory, vol_num, chap_num, extra_verbose)
|
|
|
|
if actual_file and os.path.exists(actual_file):
|
|
if extra_verbose:
|
|
print(f"Found alternative file: {os.path.basename(actual_file)}")
|
|
|
|
new_chapter_info = dict(chapter_info)
|
|
new_chapter_info['filename'] = actual_file
|
|
fixed_chapters.append(new_chapter_info)
|
|
else:
|
|
if extra_verbose:
|
|
print(f"No alternative found for volume {vol_num}, chapter {chap_num}")
|
|
unresolved_chapters.append(chapter_info)
|
|
|
|
return fixed_chapters, unresolved_chapters
|
|
|
|
def find_file_by_volume_chapter(directory, volume_num, chapter_num, extra_verbose=False):
|
|
"""Find a file in a directory by its volume and chapter number only."""
|
|
if not os.path.exists(directory):
|
|
if extra_verbose:
|
|
print(f"Directory does not exist: {directory}")
|
|
return None
|
|
|
|
if extra_verbose:
|
|
print(f"Looking for volume {volume_num}, chapter {chapter_num} in {directory}")
|
|
|
|
try:
|
|
files = [f for f in os.listdir(directory) if f.lower().endswith('.cbz')]
|
|
|
|
# We'll only focus on the essential part: finding files by volume and chapter numbers
|
|
# regardless of special characters or encoding issues
|
|
|
|
# First: Look for exact pattern matches
|
|
for file in files:
|
|
# Look for both zero-padded and non-zero-padded versions
|
|
v_patterns = [f"v{volume_num:02d}", f"v{volume_num}"]
|
|
c_patterns = [f"c{chapter_num}", f"c{chapter_num.zfill(3)}" if chapter_num.isdigit() else f"c{chapter_num}"]
|
|
|
|
for v_pattern in v_patterns:
|
|
for c_pattern in c_patterns:
|
|
if v_pattern in file and c_pattern in file:
|
|
if extra_verbose:
|
|
print(f"Found exact match: {file}")
|
|
return os.path.join(directory, file)
|
|
|
|
# Second: Use regex pattern matching
|
|
for file in files:
|
|
v_match = re.search(fr'v0*{volume_num}[^0-9]', file.lower())
|
|
c_match = re.search(fr'c0*{chapter_num}[^0-9]', file.lower())
|
|
|
|
if v_match and c_match:
|
|
if extra_verbose:
|
|
print(f"Found regex match: {file}")
|
|
return os.path.join(directory, file)
|
|
|
|
# Third: Last resort - simplified alphanumeric comparison
|
|
for file in files:
|
|
clean_file = ''.join(c.lower() for c in file if c.isalnum())
|
|
if f"v{volume_num}" in clean_file and f"c{chapter_num}" in clean_file:
|
|
if extra_verbose:
|
|
print(f"Found simplified match: {file}")
|
|
return os.path.join(directory, file)
|
|
|
|
if extra_verbose:
|
|
print(f"No match found for volume {volume_num}, chapter {chapter_num}")
|
|
print("Available files:")
|
|
for file in sorted(files)[:10]:
|
|
print(f" - {file}")
|
|
if len(files) > 10:
|
|
print(f" ... and {len(files) - 10} more")
|
|
|
|
return None
|
|
except Exception as e:
|
|
if extra_verbose:
|
|
print(f"Error searching for file: {e}")
|
|
return None
|
|
|
|
def create_clean_filename_mapping(cbz_files, extra_verbose=False):
|
|
"""Create a mapping of problematic filenames to clean alternatives."""
|
|
filename_mapping = {}
|
|
|
|
for file_path in cbz_files:
|
|
has_problem, _ = has_problematic_characters(file_path)
|
|
|
|
if has_problem:
|
|
# Get the directory and filename
|
|
directory = os.path.dirname(file_path)
|
|
filename = os.path.basename(file_path)
|
|
|
|
# Create a clean version of the filename by removing problematic characters
|
|
clean_filename = ''.join(c if not is_emoji(c) and c != '?' else '_' for c in filename)
|
|
|
|
# Make sure we don't create duplicates
|
|
base, ext = os.path.splitext(clean_filename)
|
|
counter = 1
|
|
while os.path.exists(os.path.join(directory, clean_filename)):
|
|
clean_filename = f"{base}_{counter}{ext}"
|
|
counter += 1
|
|
|
|
# Add to mapping
|
|
filename_mapping[file_path] = os.path.join(directory, clean_filename)
|
|
|
|
if extra_verbose:
|
|
print(f"Mapped: {filename} -> {clean_filename}")
|
|
|
|
return filename_mapping |