376 lines
11 KiB
Python
376 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
PDF Edge Detection with Color Grouping (Preserving Edge Segregation)
|
|
|
|
Input: input.pdf
|
|
Output: output_sobel/ folder
|
|
"""
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from pdf2image import convert_from_path
|
|
import os
|
|
import shutil
|
|
from collections import Counter
|
|
|
|
|
|
def clear_output_directory(output_dir):
|
|
if os.path.exists(output_dir):
|
|
shutil.rmtree(output_dir)
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
|
|
def enhance_pastel_colors(image_bgr):
|
|
"""
|
|
Increase saturation of pastel colors, keep gray closer to black.
|
|
"""
|
|
hsv = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2HSV).astype(np.float32)
|
|
h, s, v = cv2.split(hsv)
|
|
|
|
# Identify pastel colors
|
|
pastel_mask = (v > 150) & (s < 100) & (s > 10)
|
|
|
|
# Identify gray
|
|
gray_mask = (s <= 10)
|
|
|
|
# Boost saturation for pastels
|
|
s[pastel_mask] = np.clip(s[pastel_mask] * 2.5, 0, 255)
|
|
|
|
# Darken grays
|
|
v[gray_mask] = np.clip(v[gray_mask] * 0.3, 0, 255)
|
|
|
|
# Reconstruct
|
|
hsv_enhanced = cv2.merge([h, s, v]).astype(np.uint8)
|
|
result = cv2.cvtColor(hsv_enhanced, cv2.COLOR_HSV2BGR)
|
|
|
|
return result
|
|
|
|
|
|
def sobel_edge_detection(image):
|
|
"""Apply Sobel filter to detect edges."""
|
|
# Quantize colors
|
|
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
|
h, s, v = cv2.split(hsv)
|
|
|
|
h_quantized = (h // 5) * 5
|
|
s_quantized = (s // 64) * 64
|
|
v_quantized = (v // 64) * 64
|
|
|
|
hsv_quantized = cv2.merge([h_quantized, s_quantized, v_quantized])
|
|
image_quantized = cv2.cvtColor(hsv_quantized, cv2.COLOR_HSV2BGR)
|
|
|
|
# Apply Sobel
|
|
b, g, r = cv2.split(image_quantized)
|
|
|
|
edges_b = np.sqrt(cv2.Sobel(b, cv2.CV_64F, 1, 0, ksize=3)**2 +
|
|
cv2.Sobel(b, cv2.CV_64F, 0, 1, ksize=3)**2)
|
|
edges_g = np.sqrt(cv2.Sobel(g, cv2.CV_64F, 1, 0, ksize=3)**2 +
|
|
cv2.Sobel(g, cv2.CV_64F, 0, 1, ksize=3)**2)
|
|
edges_r = np.sqrt(cv2.Sobel(r, cv2.CV_64F, 1, 0, ksize=3)**2 +
|
|
cv2.Sobel(r, cv2.CV_64F, 0, 1, ksize=3)**2)
|
|
|
|
combined = np.sqrt(edges_b**2 + edges_g**2 + edges_r**2)
|
|
combined = combined / (combined.max() + 1e-8)
|
|
|
|
edge_mask = (combined > 0.10).astype(np.uint8) * 255
|
|
kernel = np.ones((2, 2), np.uint8)
|
|
edge_mask = cv2.morphologyEx(edge_mask, cv2.MORPH_CLOSE, kernel)
|
|
|
|
# Create BGRA with original colors
|
|
result = np.zeros((image.shape[0], image.shape[1], 4), dtype=np.uint8)
|
|
result[edge_mask > 0, :3] = image[edge_mask > 0]
|
|
result[edge_mask > 0, 3] = 255
|
|
|
|
# Remove white pixels
|
|
white_mask = np.all(result[:, :, :3] > 240, axis=2)
|
|
result[white_mask, 3] = 0
|
|
|
|
return result
|
|
|
|
|
|
def analyze_edge_colors(edge_img, edge_mask):
|
|
"""
|
|
Analyze if an edge has multiple distinct colors.
|
|
|
|
Returns:
|
|
(has_multiple_colors, num_colors, dominant_hues)
|
|
"""
|
|
bgr = edge_img[:, :, :3]
|
|
pixels = bgr[edge_mask]
|
|
|
|
# Filter white
|
|
non_white = pixels[~np.all(pixels > 240, axis=1)]
|
|
|
|
if len(non_white) < 10:
|
|
return False, 0, []
|
|
|
|
# Convert to HSV
|
|
hsv = cv2.cvtColor(non_white.reshape(-1, 1, 3), cv2.COLOR_BGR2HSV).reshape(-1, 3)
|
|
|
|
# Filter low saturation (gray)
|
|
saturated_mask = hsv[:, 1] > 30
|
|
saturated_hsv = hsv[saturated_mask]
|
|
|
|
if len(saturated_hsv) < 10:
|
|
return False, 0, []
|
|
|
|
# Quantize hue into bins (every 10 degrees)
|
|
hue_bins = (saturated_hsv[:, 0] // 10).astype(np.int32)
|
|
|
|
# Count occurrences
|
|
unique_hues, counts = np.unique(hue_bins, return_counts=True)
|
|
|
|
# Filter significant hues (>5% of pixels)
|
|
total = len(hue_bins)
|
|
significant_mask = counts > (total * 0.05)
|
|
significant_hues = unique_hues[significant_mask]
|
|
|
|
num_colors = len(significant_hues)
|
|
|
|
return num_colors > 1, num_colors, significant_hues.tolist()
|
|
|
|
|
|
def split_edge_by_color(edges_bgra, edge_mask, labels, edge_id, num_colors):
|
|
"""
|
|
Split edge into multiple sub-edges based on color using K-means.
|
|
|
|
Returns:
|
|
List of (sub_edge_image, cluster_id) tuples
|
|
"""
|
|
bgr = edges_bgra[:, :, :3]
|
|
|
|
# Get edge pixels
|
|
y_coords, x_coords = np.where(edge_mask)
|
|
edge_pixels = bgr[edge_mask]
|
|
|
|
# Filter white and convert to HSV
|
|
non_white_mask = ~np.all(edge_pixels > 240, axis=1)
|
|
valid_pixels = edge_pixels[non_white_mask]
|
|
valid_y = y_coords[non_white_mask]
|
|
valid_x = x_coords[non_white_mask]
|
|
|
|
if len(valid_pixels) < 10:
|
|
# Return original edge
|
|
edge_img = np.zeros_like(edges_bgra)
|
|
edge_img[edge_mask] = edges_bgra[edge_mask]
|
|
return [(edge_img, 0)]
|
|
|
|
# Convert to HSV for clustering (use only H and S)
|
|
hsv = cv2.cvtColor(valid_pixels.reshape(-1, 1, 3), cv2.COLOR_BGR2HSV).reshape(-1, 3)
|
|
features = hsv[:, :2].astype(np.float32) # Hue and Saturation only
|
|
|
|
# K-means clustering
|
|
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
|
|
_, cluster_labels, centers = cv2.kmeans(features, num_colors, None, criteria, 3, cv2.KMEANS_PP_CENTERS)
|
|
|
|
cluster_labels = cluster_labels.flatten()
|
|
|
|
# Create sub-edges (keep them separate!)
|
|
sub_edges = []
|
|
|
|
for cluster_id in range(num_colors):
|
|
cluster_mask_1d = (cluster_labels == cluster_id)
|
|
|
|
# Create separate image for this sub-edge
|
|
sub_edge_img = np.zeros_like(edges_bgra)
|
|
cluster_y = valid_y[cluster_mask_1d]
|
|
cluster_x = valid_x[cluster_mask_1d]
|
|
sub_edge_img[cluster_y, cluster_x] = edges_bgra[cluster_y, cluster_x]
|
|
|
|
sub_edges.append((sub_edge_img, cluster_id))
|
|
|
|
return sub_edges
|
|
|
|
|
|
def get_edge_mode_color(edge_img, edge_mask):
|
|
"""
|
|
Get the mode (most common) color of an edge.
|
|
"""
|
|
bgr = edge_img[:, :, :3]
|
|
pixels = bgr[edge_mask]
|
|
|
|
# Filter white
|
|
non_white = pixels[~np.all(pixels > 240, axis=1)]
|
|
|
|
if len(non_white) == 0:
|
|
return None
|
|
|
|
# Convert to HSV
|
|
hsv = cv2.cvtColor(non_white.reshape(-1, 1, 3), cv2.COLOR_BGR2HSV).reshape(-1, 3)
|
|
|
|
# Filter low saturation
|
|
saturated_mask = (hsv[:, 1] > 30)
|
|
saturated_pixels = non_white[saturated_mask]
|
|
|
|
if len(saturated_pixels) == 0:
|
|
saturated_pixels = non_white
|
|
|
|
# Get mode color
|
|
pixel_ints = (saturated_pixels[:, 0].astype(np.int32) +
|
|
saturated_pixels[:, 1].astype(np.int32) * 256 +
|
|
saturated_pixels[:, 2].astype(np.int32) * 65536)
|
|
|
|
mode_int = np.bincount(pixel_ints).argmax()
|
|
|
|
mode_color = np.array([
|
|
mode_int % 256,
|
|
(mode_int // 256) % 256,
|
|
(mode_int // 65536) % 256
|
|
], dtype=np.uint8)
|
|
|
|
return mode_color
|
|
|
|
|
|
def process_and_group_edges(edges_bgra, color_threshold=30):
|
|
"""
|
|
Process edges: split multi-color edges, then group by color.
|
|
Edges remain separate (segregated) even within groups.
|
|
|
|
Returns:
|
|
List of (group_image, mode_color, edge_count) tuples
|
|
"""
|
|
alpha = edges_bgra[:, :, 3]
|
|
|
|
# Find connected components
|
|
num_labels, labels = cv2.connectedComponents(alpha)
|
|
|
|
print(f" Found {num_labels - 1} edges")
|
|
|
|
if num_labels <= 1:
|
|
return []
|
|
|
|
# Process each edge: split if multi-color
|
|
all_edge_images = []
|
|
|
|
for edge_id in range(1, num_labels):
|
|
edge_mask = (labels == edge_id)
|
|
|
|
if not np.any(edge_mask):
|
|
continue
|
|
|
|
# Analyze colors
|
|
has_multiple, num_colors, hues = analyze_edge_colors(edges_bgra, edge_mask)
|
|
|
|
if has_multiple:
|
|
print(f" Edge {edge_id}: {num_colors} colors detected, splitting...")
|
|
# Split into sub-edges
|
|
sub_edges = split_edge_by_color(edges_bgra, edge_mask, labels, edge_id, num_colors)
|
|
all_edge_images.extend(sub_edges)
|
|
else:
|
|
# Keep as single edge
|
|
edge_img = np.zeros_like(edges_bgra)
|
|
edge_img[edge_mask] = edges_bgra[edge_mask]
|
|
all_edge_images.append((edge_img, 0))
|
|
|
|
print(f" Total edges after splitting: {len(all_edge_images)}")
|
|
|
|
# Get mode color for each edge
|
|
edge_colors = []
|
|
for edge_img, cluster_id in all_edge_images:
|
|
edge_mask = edge_img[:, :, 3] > 0
|
|
mode_color = get_edge_mode_color(edge_img, edge_mask)
|
|
edge_colors.append(mode_color)
|
|
|
|
# Group by similar colors
|
|
groups = []
|
|
used_indices = set()
|
|
|
|
for i, mode_color in enumerate(edge_colors):
|
|
if i in used_indices or mode_color is None:
|
|
continue
|
|
|
|
# Start new group
|
|
group_indices = [i]
|
|
used_indices.add(i)
|
|
|
|
# Find similar edges
|
|
for j, other_color in enumerate(edge_colors):
|
|
if j in used_indices or other_color is None:
|
|
continue
|
|
|
|
# Calculate color distance
|
|
distance = np.linalg.norm(mode_color.astype(float) - other_color.astype(float))
|
|
|
|
if distance <= color_threshold:
|
|
group_indices.append(j)
|
|
used_indices.add(j)
|
|
|
|
# Create group image (edges remain separate!)
|
|
group_img = np.zeros_like(edges_bgra)
|
|
for idx in group_indices:
|
|
edge_img, _ = all_edge_images[idx]
|
|
mask = edge_img[:, :, 3] > 0
|
|
group_img[mask] = edge_img[mask]
|
|
|
|
groups.append((group_img, mode_color, len(group_indices)))
|
|
|
|
print(f" Grouped into {len(groups)} color groups")
|
|
|
|
return groups
|
|
|
|
|
|
def process_pdf(pdf_path, output_dir, dpi=200):
|
|
clear_output_directory(output_dir)
|
|
|
|
print(f"Processing PDF: {pdf_path}")
|
|
print(f"Converting at {dpi} DPI...\n")
|
|
|
|
images = convert_from_path(pdf_path, dpi=dpi)
|
|
print(f"Total pages: {len(images)}\n")
|
|
|
|
for page_num, pil_image in enumerate(images, start=1):
|
|
print(f"Page {page_num}/{len(images)}...")
|
|
|
|
# Convert to BGR
|
|
image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
|
|
|
|
# Enhance pastel colors
|
|
print(f" - Enhancing pastel colors...")
|
|
enhanced_image = enhance_pastel_colors(image)
|
|
|
|
# Detect edges
|
|
print(f" - Detecting edges...")
|
|
edges = sobel_edge_detection(enhanced_image)
|
|
|
|
# Process and group edges
|
|
print(f" - Processing and grouping edges by color...")
|
|
groups = process_and_group_edges(edges, color_threshold=30)
|
|
|
|
# Save outputs
|
|
base = f"page{page_num:03d}"
|
|
|
|
cv2.imwrite(os.path.join(output_dir, f"{base}_original.png"), image)
|
|
cv2.imwrite(os.path.join(output_dir, f"{base}_enhanced.png"), enhanced_image)
|
|
cv2.imwrite(os.path.join(output_dir, f"{base}_edges.png"), edges)
|
|
|
|
# Save each group
|
|
for group_idx, (group_img, mode_color, edge_count) in enumerate(groups, start=1):
|
|
path = os.path.join(output_dir, f"{base}_group{group_idx}.png")
|
|
cv2.imwrite(path, group_img)
|
|
print(f" Group {group_idx}: {edge_count} edges, mode color (BGR): {tuple(mode_color)}")
|
|
|
|
print(f" - Saved {len(groups)} group images\n")
|
|
|
|
print("Complete!")
|
|
|
|
|
|
def main():
|
|
pdf_path = "input.pdf"
|
|
output_dir = "output_sobel"
|
|
|
|
if not os.path.exists(pdf_path):
|
|
print(f"Error: '{pdf_path}' not found!")
|
|
return 1
|
|
|
|
try:
|
|
process_pdf(pdf_path, output_dir)
|
|
return 0
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit(main()) |