189 lines
6.8 KiB
Python
189 lines
6.8 KiB
Python
import os
|
|
import re
|
|
|
|
def normalize_filename(filename):
|
|
"""Normalize a filename to handle encoding issues and special characters."""
|
|
# Replace common problematic characters
|
|
replacements = {
|
|
'?': "'", # Replace question marks that might be incorrectly encoded apostrophes
|
|
'?': "'", # Another possible encoding of apostrophe
|
|
'?': '"', # Possible encoding of double quote
|
|
'?': '-', # Possible encoding of dash
|
|
'?': ' ' # Possible encoding of space
|
|
}
|
|
|
|
for bad_char, good_char in replacements.items():
|
|
filename = filename.replace(bad_char, good_char)
|
|
|
|
return filename
|
|
|
|
def parse_manga_filename(filename):
|
|
"""Extract volume, chapter and title information from a manga filename."""
|
|
# Pattern to match: manga_name v## c### [optional title] [optional group]
|
|
base_filename = os.path.basename(filename)
|
|
|
|
# Try to normalize the filename to handle encoding issues
|
|
normalized_filename = normalize_filename(base_filename)
|
|
|
|
# Updated pattern to handle decimal chapter numbers like c019.1
|
|
pattern = r'(.*?)\s+v(\d+)\s+c(\d+(?:\.\d+)?)\s*(.*)\.cbz$'
|
|
match = re.match(pattern, normalized_filename)
|
|
|
|
if match:
|
|
manga_name = match.group(1).strip()
|
|
volume = int(match.group(2))
|
|
chapter_str = match.group(3)
|
|
rest = match.group(4).strip()
|
|
|
|
# Try to extract title and group if available
|
|
group_match = re.search(r'\[(.*?)\]$', rest)
|
|
if group_match:
|
|
group = group_match.group(1)
|
|
title = rest[:rest.rfind('[')-1].strip()
|
|
else:
|
|
group = ""
|
|
title = rest.strip()
|
|
|
|
# Handle chapter numbers like "005.5" or "005-006"
|
|
try:
|
|
chapter = float(chapter_str)
|
|
except ValueError:
|
|
try:
|
|
chapter = float(chapter_str.split('-')[0]) # Take first number for ranges
|
|
except ValueError:
|
|
chapter = 0 # Fallback for unparseable chapter numbers
|
|
|
|
return {
|
|
'manga_name': manga_name,
|
|
'volume': volume,
|
|
'chapter': chapter,
|
|
'chapter_str': chapter_str,
|
|
'title': title,
|
|
'group': group,
|
|
'filename': filename
|
|
}
|
|
|
|
# Try an alternative pattern for filenames like "Manga Name v04 c021.1.cbz" (no title/group)
|
|
alt_pattern = r'(.*?)\s+v(\d+)\s+c(\d+(?:\.\d+)?)\.cbz$'
|
|
alt_match = re.match(alt_pattern, normalized_filename)
|
|
|
|
if alt_match:
|
|
manga_name = alt_match.group(1).strip()
|
|
volume = int(alt_match.group(2))
|
|
chapter_str = alt_match.group(3)
|
|
|
|
try:
|
|
chapter = float(chapter_str)
|
|
except ValueError:
|
|
chapter = 0
|
|
|
|
return {
|
|
'manga_name': manga_name,
|
|
'volume': volume,
|
|
'chapter': chapter,
|
|
'chapter_str': chapter_str,
|
|
'title': '',
|
|
'group': '',
|
|
'filename': filename
|
|
}
|
|
|
|
# Add a new pattern for "Vol. XX Ch. YYY - Title.cbz" format
|
|
vol_ch_pattern = r'Vol\.\s*(\d+)\s+Ch\.\s*(\d+(?:\.\d+)?)\s*(?:-\s*(.*))?\.cbz$'
|
|
vol_ch_match = re.match(vol_ch_pattern, normalized_filename, re.IGNORECASE)
|
|
|
|
if vol_ch_match:
|
|
# For this format, we need to extract the manga name from the directory
|
|
manga_name = os.path.basename(os.path.dirname(filename))
|
|
volume = int(vol_ch_match.group(1))
|
|
chapter_str = vol_ch_match.group(2)
|
|
title = vol_ch_match.group(3) if vol_ch_match.group(3) else ""
|
|
|
|
try:
|
|
chapter = float(chapter_str)
|
|
except ValueError:
|
|
chapter = 0
|
|
|
|
return {
|
|
'manga_name': manga_name,
|
|
'volume': volume,
|
|
'chapter': chapter,
|
|
'chapter_str': chapter_str,
|
|
'title': title,
|
|
'group': '',
|
|
'filename': filename
|
|
}
|
|
|
|
# New pattern for underscore_format_vXXcYY_(ScanGroup)[tag].cbz
|
|
underscore_pattern = r'(.+?)_v(\d+)c(\d+[a-z]?(?:\.\d+)?)(?:_\((.+?)\))?(?:\[(.+?)\])?\.cbz$'
|
|
underscore_match = re.match(underscore_pattern, normalized_filename)
|
|
|
|
if underscore_match:
|
|
# Extract parts from the filename
|
|
manga_name = underscore_match.group(1).replace('_', ' ').strip()
|
|
volume = int(underscore_match.group(2))
|
|
chapter_str = underscore_match.group(3)
|
|
publisher = underscore_match.group(4) if underscore_match.group(4) else ""
|
|
tag = underscore_match.group(5) if underscore_match.group(5) else ""
|
|
|
|
# Handle special case for titles with volume TPB format
|
|
if '_-_' in normalized_filename and '(Dark_Horse_TPB)' in normalized_filename:
|
|
# This is likely a tankōbon/volume title format like "Shadow_Star_v04_-_Nothing_But_the_Truth_(Dark_Horse_TPB)[m-s].cbz"
|
|
title_match = re.search(r'_v\d+_-_(.+?)_\(', normalized_filename)
|
|
title = title_match.group(1).replace('_', ' ') if title_match else ""
|
|
else:
|
|
title = ""
|
|
|
|
# Create group/publisher string
|
|
group = f"{publisher} [{tag}]" if publisher and tag else publisher or tag
|
|
|
|
# Handle chapter numbers with letter suffixes like "50b"
|
|
try:
|
|
# Extract just the numeric part for sorting
|
|
numeric_part = re.match(r'(\d+(?:\.\d+)?)', chapter_str)
|
|
if (numeric_part):
|
|
chapter = float(numeric_part.group(1))
|
|
else:
|
|
chapter = 0
|
|
except ValueError:
|
|
chapter = 0
|
|
|
|
return {
|
|
'manga_name': manga_name,
|
|
'volume': volume,
|
|
'chapter': chapter,
|
|
'chapter_str': chapter_str,
|
|
'title': title,
|
|
'group': group,
|
|
'filename': filename
|
|
}
|
|
|
|
# Add a new pattern for "Manga Name Vol XX, YYYcTitle.cbz" format
|
|
comma_vol_pattern = r'^(.*?)\s+Vol\s+(\d+),\s+(\d+(?:\.\d+)?)[cC](.*?)\.cbz$'
|
|
comma_vol_match = re.match(comma_vol_pattern, normalized_filename, re.IGNORECASE)
|
|
|
|
if comma_vol_match:
|
|
manga_name = comma_vol_match.group(1).strip()
|
|
volume = int(comma_vol_match.group(2))
|
|
chapter_str = comma_vol_match.group(3)
|
|
title = comma_vol_match.group(4).strip()
|
|
|
|
try:
|
|
chapter = float(chapter_str)
|
|
except ValueError:
|
|
chapter = 0
|
|
|
|
return {
|
|
'manga_name': manga_name,
|
|
'volume': volume,
|
|
'chapter': chapter,
|
|
'chapter_str': chapter_str,
|
|
'title': title,
|
|
'group': '',
|
|
'filename': filename
|
|
}
|
|
|
|
if os.path.exists(filename):
|
|
# For debugging: print the filename that couldn't be parsed
|
|
print(f"WARNING: Could not parse filename: {base_filename}")
|
|
|
|
return None |