71 lines
2.5 KiB
Python
71 lines
2.5 KiB
Python
# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/parsing.py
|
|
import os
|
|
import re
|
|
|
|
def normalize_filename(filename):
|
|
"""Normalize a filename to handle encoding issues and special characters."""
|
|
# Replace common problematic characters
|
|
replacements = {
|
|
'?': "'", # Replace question marks that might be incorrectly encoded apostrophes
|
|
'?': "'", # Another possible encoding of apostrophe
|
|
'?': '"', # Possible encoding of double quote
|
|
'?': '-', # Possible encoding of dash
|
|
'?': ' ' # Possible encoding of space
|
|
}
|
|
|
|
for bad_char, good_char in replacements.items():
|
|
filename = filename.replace(bad_char, good_char)
|
|
|
|
return filename
|
|
|
|
def parse_manga_filename(filename):
|
|
"""Extract volume, chapter and title information from a manga filename."""
|
|
# Pattern to match: manga_name v## c### [optional title] [optional group]
|
|
base_filename = os.path.basename(filename)
|
|
|
|
# Try to normalize the filename to handle encoding issues
|
|
normalized_filename = normalize_filename(base_filename)
|
|
|
|
# Use a more flexible pattern to handle apostrophes and other special characters
|
|
pattern = r'(.*?)\s+v(\d+)\s+c(\d+[.\d-]*)\s+(.*)\.cbz$'
|
|
match = re.match(pattern, normalized_filename)
|
|
|
|
if match:
|
|
manga_name = match.group(1).strip()
|
|
volume = int(match.group(2))
|
|
chapter_str = match.group(3)
|
|
rest = match.group(4)
|
|
|
|
# Try to extract title and group if available
|
|
group_match = re.search(r'\[(.*?)\]$', rest)
|
|
if group_match:
|
|
group = group_match.group(1)
|
|
title = rest[:rest.rfind('[')-1].strip()
|
|
else:
|
|
group = ""
|
|
title = rest.strip()
|
|
|
|
# Handle chapter numbers like "005.5" or "005-006"
|
|
try:
|
|
chapter = float(chapter_str)
|
|
except ValueError:
|
|
try:
|
|
chapter = float(chapter_str.split('-')[0]) # Take first number for ranges
|
|
except ValueError:
|
|
chapter = 0 # Fallback for unparseable chapter numbers
|
|
|
|
return {
|
|
'manga_name': manga_name,
|
|
'volume': volume,
|
|
'chapter': chapter,
|
|
'chapter_str': chapter_str,
|
|
'title': title,
|
|
'group': group,
|
|
'filename': filename
|
|
}
|
|
|
|
if os.path.exists(filename):
|
|
# For debugging: print the filename that couldn't be parsed
|
|
print(f"WARNING: Could not parse filename: {base_filename}")
|
|
|
|
return None |