Init CBZ Volume Combiner v0.2.0

This commit is contained in:
Ben
2025-03-21 23:04:35 +00:00
commit 9fa11aea72
11 changed files with 1076 additions and 0 deletions

View File

@@ -0,0 +1,125 @@
# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/file_utils.py
import os
import re
from .parsing import parse_manga_filename
def find_cbz_files(folder_path, recursive=False, extra_verbose=False):
"""Find all CBZ files in the given folder."""
cbz_files = []
if recursive:
if extra_verbose:
print(f"Recursively searching for CBZ files in {folder_path}")
for root, _, files in os.walk(folder_path):
for file in files:
if file.lower().endswith('.cbz'):
cbz_files.append(os.path.join(root, file))
else:
if extra_verbose:
print(f"Searching for CBZ files in {folder_path} (non-recursive)")
for file in os.listdir(folder_path):
if file.lower().endswith('.cbz'):
cbz_files.append(os.path.join(folder_path, file))
if extra_verbose:
print(f"Found {len(cbz_files)} CBZ files")
return cbz_files
def fix_missing_files(chapter_infos, folder_path, extra_verbose=False):
"""Attempt to find missing files by searching for similar filenames."""
fixed_chapters = []
unresolved_chapters = []
for chapter_info in chapter_infos:
# Check if file exists first
if os.path.exists(chapter_info['filename']):
fixed_chapters.append(chapter_info)
continue
# If we're here, the file doesn't exist - get the problematic filename
base_filename = os.path.basename(chapter_info['filename'])
directory = os.path.dirname(chapter_info['filename'])
if extra_verbose:
print(f"Trying to find replacement for: {base_filename}")
# First approach: Direct check with normalized path
# This is for cases where Python's path handling might be different from the filesystem
found_replacement = False
# Look for similar files in the directory
try:
# Get all CBZ files in the directory
cbz_files_in_dir = [f for f in os.listdir(directory) if f.lower().endswith('.cbz')]
# First try: Look for exact matches with volume and chapter numbers
vol_num = chapter_info['volume']
chap_num = chapter_info['chapter_str']
# Improved pattern matching for volume and chapter numbers
vol_pattern = f"v{vol_num:02d}" # e.g., "v07"
chap_pattern = f"c{chap_num}" # e.g., "c037"
# Try to find a direct match first
for file in cbz_files_in_dir:
# Check volume and chapter patterns
if vol_pattern in file and chap_pattern in file:
potential_path = os.path.join(directory, file)
if os.path.exists(potential_path):
if extra_verbose:
print(f" Found direct match: {file}")
new_chapter_info = dict(chapter_info)
new_chapter_info['filename'] = potential_path
fixed_chapters.append(new_chapter_info)
found_replacement = True
break
# If no direct match, try more flexible matching
if not found_replacement:
# Fuzzy match approach
chapter_pattern = rf"v0*{vol_num}\s+c0*{chap_num.lstrip('0')}"
for file in cbz_files_in_dir:
# Remove special characters for comparison
clean_file = file
for char in "'?,":
clean_file = clean_file.replace(char, '')
# Strip special characters from the pattern too
clean_pattern = chapter_pattern
for char in "'?,":
clean_pattern = clean_pattern.replace(char, '')
# Try matching with cleaned strings
if re.search(chapter_pattern, file, re.IGNORECASE) or re.search(clean_pattern, clean_file, re.IGNORECASE):
potential_path = os.path.join(directory, file)
if os.path.exists(potential_path):
if extra_verbose:
print(f" Found fuzzy match: {file}")
new_chapter_info = dict(chapter_info)
new_chapter_info['filename'] = potential_path
fixed_chapters.append(new_chapter_info)
found_replacement = True
break
# Last resort: List all files and let the user see what's available
if not found_replacement and extra_verbose:
print(" No match found. Available files in directory:")
for idx, file in enumerate(sorted(cbz_files_in_dir)):
if idx < 20: # Limit to first 20 files to avoid spam
print(f" - {file}")
else:
print(f" ... and {len(cbz_files_in_dir) - 20} more files")
break
except Exception as e:
if extra_verbose:
print(f" Error while searching for replacement: {str(e)}")
if not found_replacement:
unresolved_chapters.append(chapter_info)
return fixed_chapters, unresolved_chapters