Files
cbz-volume-combiner/cbz_volume_combiner/parsing.py
2025-03-21 23:04:35 +00:00

71 lines
2.5 KiB
Python

# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/parsing.py
import os
import re
def normalize_filename(filename):
"""Normalize a filename to handle encoding issues and special characters."""
# Replace common problematic characters
replacements = {
'?': "'", # Replace question marks that might be incorrectly encoded apostrophes
'?': "'", # Another possible encoding of apostrophe
'?': '"', # Possible encoding of double quote
'?': '-', # Possible encoding of dash
'?': ' ' # Possible encoding of space
}
for bad_char, good_char in replacements.items():
filename = filename.replace(bad_char, good_char)
return filename
def parse_manga_filename(filename):
"""Extract volume, chapter and title information from a manga filename."""
# Pattern to match: manga_name v## c### [optional title] [optional group]
base_filename = os.path.basename(filename)
# Try to normalize the filename to handle encoding issues
normalized_filename = normalize_filename(base_filename)
# Use a more flexible pattern to handle apostrophes and other special characters
pattern = r'(.*?)\s+v(\d+)\s+c(\d+[.\d-]*)\s+(.*)\.cbz$'
match = re.match(pattern, normalized_filename)
if match:
manga_name = match.group(1).strip()
volume = int(match.group(2))
chapter_str = match.group(3)
rest = match.group(4)
# Try to extract title and group if available
group_match = re.search(r'\[(.*?)\]$', rest)
if group_match:
group = group_match.group(1)
title = rest[:rest.rfind('[')-1].strip()
else:
group = ""
title = rest.strip()
# Handle chapter numbers like "005.5" or "005-006"
try:
chapter = float(chapter_str)
except ValueError:
try:
chapter = float(chapter_str.split('-')[0]) # Take first number for ranges
except ValueError:
chapter = 0 # Fallback for unparseable chapter numbers
return {
'manga_name': manga_name,
'volume': volume,
'chapter': chapter,
'chapter_str': chapter_str,
'title': title,
'group': group,
'filename': filename
}
if os.path.exists(filename):
# For debugging: print the filename that couldn't be parsed
print(f"WARNING: Could not parse filename: {base_filename}")
return None