1262 lines
53 KiB
Python
1262 lines
53 KiB
Python
import boto3
|
|
import json
|
|
import os
|
|
from PIL import Image, ImageDraw
|
|
import numpy as np
|
|
from scipy.optimize import linear_sum_assignment
|
|
import re
|
|
|
|
# Configuration
|
|
REGION = 'us-east-1'
|
|
CUSTOM_LABELS_PROJECT_ARN = 'arn:aws:rekognition:us-east-1:173378533286:project/labels-valvula/version/labels-valvula.2025-11-24T15.44.16/1764009856090'
|
|
CONFIDENCE_THRESHOLD = 80 # Minimum confidence for custom labels detection
|
|
|
|
|
|
class DiagramProcessor:
|
|
"""Process a single diagram: segment, detect text, clean, and recognize objects"""
|
|
|
|
def __init__(self, region=REGION, custom_labels_arn=CUSTOM_LABELS_PROJECT_ARN):
|
|
self.textract_client = boto3.client('textract', region_name=region)
|
|
self.rekognition_client = boto3.client('rekognition', region_name=region)
|
|
self.custom_labels_arn = custom_labels_arn
|
|
self.region = region
|
|
|
|
def segment_image(self, image_path, output_dir, grid_size=(5, 5), overlap_percent=10):
|
|
"""
|
|
Segment an image into a grid with overlap
|
|
|
|
Args:
|
|
image_path: Path to input diagram image
|
|
output_dir: Directory to save segments
|
|
grid_size: Tuple (rows, cols) for grid dimensions
|
|
overlap_percent: Percentage of overlap between segments (0-100)
|
|
|
|
Returns:
|
|
List of tuples: [(segment_path, position_info), ...]
|
|
"""
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# Load image
|
|
img = Image.open(image_path)
|
|
img_width, img_height = img.size
|
|
|
|
rows, cols = grid_size
|
|
|
|
# Calculate segment dimensions with overlap
|
|
overlap_factor = overlap_percent / 100.0
|
|
segment_width = img_width / cols
|
|
segment_height = img_height / rows
|
|
|
|
# Calculate step size (distance between segment starts)
|
|
step_width = segment_width * (1 - overlap_factor)
|
|
step_height = segment_height * (1 - overlap_factor)
|
|
|
|
segments = []
|
|
segment_idx = 0
|
|
|
|
print(f"\nSegmenting image: {image_path}")
|
|
print(f"Image size: {img_width}x{img_height}")
|
|
print(f"Grid: {rows}x{cols} with {overlap_percent}% overlap")
|
|
|
|
for row in range(rows):
|
|
for col in range(cols):
|
|
# Calculate segment boundaries
|
|
left = int(col * step_width)
|
|
top = int(row * step_height)
|
|
right = int(min(left + segment_width, img_width))
|
|
bottom = int(min(top + segment_height, img_height))
|
|
|
|
# Crop segment
|
|
segment = img.crop((left, top, right, bottom))
|
|
|
|
# Save segment
|
|
segment_filename = f"segment_{row}_{col}.png"
|
|
segment_path = os.path.join(output_dir, segment_filename)
|
|
segment.save(segment_path)
|
|
|
|
# Store segment info
|
|
position_info = {
|
|
'row': row,
|
|
'col': col,
|
|
'left': left,
|
|
'top': top,
|
|
'right': right,
|
|
'bottom': bottom,
|
|
'width': right - left,
|
|
'height': bottom - top
|
|
}
|
|
|
|
segments.append((segment_path, position_info))
|
|
segment_idx += 1
|
|
|
|
print(f" Created: {segment_filename} at position ({row}, {col})")
|
|
|
|
print(f"\nTotal segments created: {len(segments)}")
|
|
return segments
|
|
|
|
def detect_text_segment(self, segment_path):
|
|
"""
|
|
Detect text in a segment using Textract
|
|
|
|
Args:
|
|
segment_path: Path to segment image
|
|
|
|
Returns:
|
|
Textract results dictionary
|
|
"""
|
|
with open(segment_path, 'rb') as image_file:
|
|
image_bytes = image_file.read()
|
|
|
|
result = self.textract_client.detect_document_text(
|
|
Document={'Bytes': image_bytes}
|
|
)
|
|
|
|
return result
|
|
|
|
def clean_text_from_segment(self, segment_path, textract_data, output_path,
|
|
shrink_percent=8.5, keep_regex_list=None, min_confidence=80):
|
|
"""
|
|
Remove text from a segment
|
|
|
|
Args:
|
|
segment_path: Path to input segment
|
|
textract_data: Textract results
|
|
output_path: Path to save cleaned segment
|
|
shrink_percent: Percentage to shrink bounding boxes
|
|
keep_regex_list: List of regex patterns to keep
|
|
min_confidence: Minimum confidence to remove text
|
|
|
|
Returns:
|
|
Statistics dictionary
|
|
"""
|
|
# Compile regex patterns
|
|
compiled_patterns = []
|
|
if keep_regex_list:
|
|
for pattern in keep_regex_list:
|
|
try:
|
|
compiled_patterns.append(re.compile(pattern))
|
|
except re.error as e:
|
|
print(f"Warning: Invalid regex pattern '{pattern}': {e}")
|
|
|
|
# Load image
|
|
img = Image.open(segment_path)
|
|
width, height = img.size
|
|
draw = ImageDraw.Draw(img)
|
|
|
|
words_removed = 0
|
|
words_kept = 0
|
|
|
|
# Process each word
|
|
for block in textract_data['Blocks']:
|
|
if block['BlockType'] == 'WORD':
|
|
text = block['Text']
|
|
confidence = block['Confidence']
|
|
|
|
# Check if word should be kept
|
|
should_keep = False
|
|
|
|
if confidence < min_confidence:
|
|
should_keep = True
|
|
words_kept += 1
|
|
|
|
if compiled_patterns:
|
|
for pattern in compiled_patterns:
|
|
if pattern.match(text):
|
|
should_keep = True
|
|
words_kept += 1
|
|
break
|
|
|
|
if should_keep:
|
|
continue
|
|
|
|
# Remove text
|
|
bbox = block['Geometry']['BoundingBox']
|
|
left = int(bbox['Left'] * width)
|
|
top = int(bbox['Top'] * height)
|
|
box_width = int(bbox['Width'] * width)
|
|
box_height = int(bbox['Height'] * height)
|
|
|
|
# Apply shrink
|
|
if shrink_percent > 0:
|
|
shrink_factor = shrink_percent / 100
|
|
width_reduction = int(box_width * shrink_factor / 2)
|
|
height_reduction = int(box_height * shrink_factor / 2)
|
|
|
|
left += width_reduction
|
|
top += height_reduction
|
|
box_width -= width_reduction * 2
|
|
box_height -= height_reduction * 2
|
|
|
|
# Draw white rectangle
|
|
draw.rectangle(
|
|
[(left, top), (left + box_width, top + box_height)],
|
|
fill='white'
|
|
)
|
|
words_removed += 1
|
|
|
|
# Save cleaned image
|
|
img.save(output_path)
|
|
|
|
return {
|
|
'words_removed': words_removed,
|
|
'words_kept': words_kept
|
|
}
|
|
|
|
def recognize_objects_segment(self, segment_path, min_confidence=CONFIDENCE_THRESHOLD):
|
|
"""
|
|
Recognize objects in a cleaned segment using Custom Labels
|
|
|
|
Args:
|
|
segment_path: Path to cleaned segment
|
|
min_confidence: Minimum confidence threshold
|
|
|
|
Returns:
|
|
Dictionary with detection results
|
|
"""
|
|
with open(segment_path, 'rb') as image_file:
|
|
image_bytes = image_file.read()
|
|
|
|
try:
|
|
response = self.rekognition_client.detect_custom_labels(
|
|
ProjectVersionArn=self.custom_labels_arn,
|
|
Image={'Bytes': image_bytes},
|
|
MinConfidence=min_confidence
|
|
)
|
|
|
|
return {
|
|
'custom_labels': response.get('CustomLabels', []),
|
|
'success': True
|
|
}
|
|
except Exception as e:
|
|
print(f"Error detecting custom labels: {e}")
|
|
return {
|
|
'custom_labels': [],
|
|
'success': False,
|
|
'error': str(e)
|
|
}
|
|
|
|
def calculate_iou(self, box1, box2):
|
|
"""
|
|
Calculate Intersection over Union (IoU) between two bounding boxes
|
|
|
|
Args:
|
|
box1, box2: Bounding boxes in global coordinates {left, top, right, bottom}
|
|
|
|
Returns:
|
|
IoU value (0 to 1)
|
|
"""
|
|
# Calculate intersection
|
|
x_left = max(box1['left'], box2['left'])
|
|
y_top = max(box1['top'], box2['top'])
|
|
x_right = min(box1['right'], box2['right'])
|
|
y_bottom = min(box1['bottom'], box2['bottom'])
|
|
|
|
if x_right < x_left or y_bottom < y_top:
|
|
return 0.0
|
|
|
|
intersection_area = (x_right - x_left) * (y_bottom - y_top)
|
|
|
|
# Calculate union
|
|
box1_area = (box1['right'] - box1['left']) * (box1['bottom'] - box1['top'])
|
|
box2_area = (box2['right'] - box2['left']) * (box2['bottom'] - box2['top'])
|
|
union_area = box1_area + box2_area - intersection_area
|
|
|
|
if union_area == 0:
|
|
return 0.0
|
|
|
|
return intersection_area / union_area
|
|
|
|
def merge_bounding_boxes(self, boxes):
|
|
"""
|
|
Merge multiple bounding boxes into one by computing their union
|
|
|
|
Args:
|
|
boxes: List of bounding box dicts with {left, top, right, bottom}
|
|
|
|
Returns:
|
|
Merged bounding box
|
|
"""
|
|
if not boxes:
|
|
return None
|
|
|
|
min_left = min(box['left'] for box in boxes)
|
|
min_top = min(box['top'] for box in boxes)
|
|
max_right = max(box['right'] for box in boxes)
|
|
max_bottom = max(box['bottom'] for box in boxes)
|
|
|
|
return {
|
|
'left': min_left,
|
|
'top': min_top,
|
|
'right': max_right,
|
|
'bottom': max_bottom,
|
|
'width': max_right - min_left,
|
|
'height': max_bottom - min_top
|
|
}
|
|
|
|
def deduplicate_detections(self, all_detections, iou_threshold=0.3):
|
|
"""
|
|
Remove duplicate detections across overlapping segments using Non-Maximum Suppression
|
|
|
|
Args:
|
|
all_detections: List of detection dicts with global coordinates
|
|
iou_threshold: IoU threshold for considering boxes as duplicates
|
|
|
|
Returns:
|
|
List of deduplicated detections
|
|
"""
|
|
if not all_detections:
|
|
return []
|
|
|
|
print(f"\n[DEDUPLICATION] Processing {len(all_detections)} detections...")
|
|
|
|
# Group detections by label name
|
|
detections_by_label = {}
|
|
for det in all_detections:
|
|
label = det['Name']
|
|
if label not in detections_by_label:
|
|
detections_by_label[label] = []
|
|
detections_by_label[label].append(det)
|
|
|
|
deduplicated = []
|
|
|
|
# Process each label separately
|
|
for label, detections in detections_by_label.items():
|
|
print(f"\n Processing label: {label} ({len(detections)} detections)")
|
|
|
|
# Sort by confidence (highest first)
|
|
detections = sorted(detections, key=lambda x: x['Confidence'], reverse=True)
|
|
|
|
# Group overlapping detections
|
|
groups = []
|
|
used = set()
|
|
|
|
for i, det in enumerate(detections):
|
|
if i in used:
|
|
continue
|
|
|
|
# Start a new group with this detection
|
|
group = [det]
|
|
used.add(i)
|
|
|
|
# Find all overlapping detections
|
|
for j, other_det in enumerate(detections):
|
|
if j in used or j == i:
|
|
continue
|
|
|
|
iou = self.calculate_iou(det['global_bbox'], other_det['global_bbox'])
|
|
|
|
if iou > iou_threshold:
|
|
group.append(other_det)
|
|
used.add(j)
|
|
|
|
groups.append(group)
|
|
|
|
print(f" Found {len(groups)} unique objects (merged from {len(detections)} detections)")
|
|
|
|
# Merge each group into a single detection
|
|
for group in groups:
|
|
if len(group) == 1:
|
|
# No duplicates, keep as is
|
|
deduplicated.append(group[0])
|
|
else:
|
|
# Merge multiple detections
|
|
merged_bbox = self.merge_bounding_boxes([d['global_bbox'] for d in group])
|
|
avg_confidence = sum(d['Confidence'] for d in group) / len(group)
|
|
|
|
merged_detection = {
|
|
'Name': label,
|
|
'Confidence': avg_confidence,
|
|
'global_bbox': merged_bbox,
|
|
'merged_from': len(group),
|
|
'source_segments': [d['segment_name'] for d in group]
|
|
}
|
|
|
|
deduplicated.append(merged_detection)
|
|
print(f" Merged {len(group)} detections into one")
|
|
|
|
print(f"\n[DEDUPLICATION] Complete: {len(all_detections)} -> {len(deduplicated)} detections")
|
|
|
|
return deduplicated
|
|
|
|
def get_bbox_center(self, bbox):
|
|
"""Get center point of bounding box"""
|
|
if 'left' in bbox:
|
|
center_x = bbox['left'] + bbox['width'] / 2
|
|
center_y = bbox['top'] + bbox['height'] / 2
|
|
else:
|
|
center_x = bbox['Left'] + bbox['Width'] / 2
|
|
center_y = bbox['Top'] + bbox['Height'] / 2
|
|
|
|
return (center_x, center_y)
|
|
|
|
def calculate_distance(self, center1, center2):
|
|
"""Calculate Euclidean distance between two centers"""
|
|
return np.sqrt(
|
|
(center1[0] - center2[0])**2 +
|
|
(center1[1] - center2[1])**2
|
|
)
|
|
|
|
def match_objects_to_text_hungarian(self, objects, all_text_detections,
|
|
pattern_regex, max_distance=None,
|
|
img_width=1.0, img_height=1.0):
|
|
"""
|
|
Match deduplicated objects to VM-#### text labels using Hungarian algorithm
|
|
|
|
Args:
|
|
objects: List of deduplicated object detections with global_bbox
|
|
all_text_detections: Combined text detections from all segments
|
|
pattern_regex: Compiled regex pattern (e.g., r'VM-\d{4}')
|
|
max_distance: Maximum allowed distance (in pixels)
|
|
img_width: Image width
|
|
img_height: Image height
|
|
|
|
Returns:
|
|
Dict with matches, unmatched_objects, unmatched_texts
|
|
"""
|
|
# Filter text that matches VM-#### pattern
|
|
matching_texts = []
|
|
for text_data in all_text_detections:
|
|
text = text_data['text']
|
|
if pattern_regex.search(text):
|
|
matching_texts.append(text_data)
|
|
|
|
if not objects:
|
|
print("\n[MATCHING] No objects to match")
|
|
return {
|
|
'matches': [],
|
|
'unmatched_objects': [],
|
|
'unmatched_texts': matching_texts
|
|
}
|
|
|
|
if not matching_texts:
|
|
print("\n[MATCHING] No matching text found")
|
|
return {
|
|
'matches': [],
|
|
'unmatched_objects': objects,
|
|
'unmatched_texts': []
|
|
}
|
|
|
|
n_objects = len(objects)
|
|
n_texts = len(matching_texts)
|
|
|
|
print(f"\n{'='*80}")
|
|
print(f"HUNGARIAN ALGORITHM MATCHING")
|
|
print(f"{'='*80}")
|
|
print(f"Objects to match: {n_objects}")
|
|
print(f"Text labels (VM-####): {n_texts}")
|
|
print(f"Max distance: {max_distance if max_distance else 'unlimited'} pixels")
|
|
|
|
# Build cost matrix
|
|
max_dim = max(n_objects, n_texts)
|
|
cost_matrix = np.full((max_dim, max_dim), 1e10)
|
|
|
|
for i, obj in enumerate(objects):
|
|
obj_bbox = obj['global_bbox']
|
|
obj_center = self.get_bbox_center(obj_bbox)
|
|
|
|
for j, text_data in enumerate(matching_texts):
|
|
text_bbox = text_data['global_bbox']
|
|
text_center = self.get_bbox_center(text_bbox)
|
|
|
|
# Calculate distance
|
|
distance = self.calculate_distance(obj_center, text_center)
|
|
|
|
# Apply distance threshold
|
|
if max_distance and distance > max_distance:
|
|
cost_matrix[i, j] = 1e10
|
|
else:
|
|
cost_matrix[i, j] = distance
|
|
|
|
# Solve assignment problem with Hungarian algorithm
|
|
row_indices, col_indices = linear_sum_assignment(cost_matrix)
|
|
|
|
# Build matches
|
|
matches = []
|
|
matched_obj_indices = set()
|
|
matched_text_indices = set()
|
|
|
|
for obj_idx, text_idx in zip(row_indices, col_indices):
|
|
# Skip padding or high-cost assignments
|
|
if (obj_idx >= n_objects or text_idx >= n_texts or
|
|
cost_matrix[obj_idx, text_idx] >= 1e10):
|
|
continue
|
|
|
|
distance = cost_matrix[obj_idx, text_idx]
|
|
|
|
match = {
|
|
'object': objects[obj_idx],
|
|
'object_name': objects[obj_idx]['Name'],
|
|
'object_bbox': objects[obj_idx]['global_bbox'],
|
|
'object_confidence': objects[obj_idx]['Confidence'],
|
|
'text': matching_texts[text_idx]['text'],
|
|
'text_bbox': matching_texts[text_idx]['global_bbox'],
|
|
'text_confidence': matching_texts[text_idx]['confidence'],
|
|
'distance': distance,
|
|
'distance_pixels': distance
|
|
}
|
|
|
|
matches.append(match)
|
|
matched_obj_indices.add(obj_idx)
|
|
matched_text_indices.add(text_idx)
|
|
|
|
print(f"\n ✓ Match: {objects[obj_idx]['Name']} -> {matching_texts[text_idx]['text']}")
|
|
print(f" Distance: {distance:.2f} pixels")
|
|
print(f" Object confidence: {objects[obj_idx]['Confidence']:.2f}%")
|
|
print(f" Text confidence: {matching_texts[text_idx]['confidence']:.2f}%")
|
|
|
|
# Find unmatched items
|
|
unmatched_objects = [
|
|
objects[i] for i in range(n_objects)
|
|
if i not in matched_obj_indices
|
|
]
|
|
|
|
unmatched_texts = [
|
|
matching_texts[j] for j in range(n_texts)
|
|
if j not in matched_text_indices
|
|
]
|
|
|
|
# Print summary
|
|
print(f"\n{'='*80}")
|
|
print(f"MATCHING SUMMARY")
|
|
print(f"{'='*80}")
|
|
print(f"Successful matches: {len(matches)}")
|
|
print(f"Unmatched objects: {len(unmatched_objects)}")
|
|
print(f"Unmatched VM-#### labels: {len(unmatched_texts)}")
|
|
|
|
if unmatched_objects:
|
|
print(f"\nUnmatched objects:")
|
|
for obj in unmatched_objects:
|
|
print(f" - {obj['Name']} (confidence: {obj['Confidence']:.2f}%)")
|
|
|
|
if unmatched_texts:
|
|
print(f"\nUnmatched text labels:")
|
|
for text_data in unmatched_texts:
|
|
print(f" - {text_data['text']} (confidence: {text_data['confidence']:.2f}%)")
|
|
|
|
return {
|
|
'matches': matches,
|
|
'unmatched_objects': unmatched_objects,
|
|
'unmatched_texts': unmatched_texts,
|
|
'n_objects': n_objects,
|
|
'n_texts': n_texts,
|
|
'matching_rate': len(matches) / max(n_objects, n_texts) if max(n_objects, n_texts) > 0 else 0
|
|
}
|
|
|
|
def match_objects_to_text_by_type(self, objects, all_text_detections,
|
|
max_distance=200, img_width=1.0, img_height=1.0):
|
|
"""
|
|
Match objects to text based on object type:
|
|
- globo, gaveta, retencao, espera -> Match to VM-#### labels (using Hungarian algorithm)
|
|
- sis_con_dist, instrumento_local -> Match to 2 text labels inside (top and bottom)
|
|
- Other objects -> Match to single text inside their bounding box
|
|
|
|
Args:
|
|
objects: List of deduplicated object detections
|
|
all_text_detections: List of deduplicated text detections
|
|
max_distance: Maximum distance for VM-#### matching (pixels)
|
|
img_width: Image width
|
|
img_height: Image height
|
|
|
|
Returns:
|
|
Dict with matches, unmatched_objects, unmatched_texts
|
|
"""
|
|
# Define which objects should match to VM-#### labels
|
|
VM_LABEL_OBJECTS = ['globo', 'gaveta', 'retencao', 'espera']
|
|
|
|
# Define which objects have 2 internal labels
|
|
TWO_LABEL_OBJECTS = ['sis_con_dist', 'instrumento_local']
|
|
|
|
# Separate objects by matching type
|
|
vm_label_objects = []
|
|
two_label_objects = []
|
|
single_label_objects = []
|
|
|
|
for obj in objects:
|
|
obj_name = obj['Name'].lower()
|
|
if obj_name in VM_LABEL_OBJECTS:
|
|
vm_label_objects.append(obj)
|
|
elif obj_name in TWO_LABEL_OBJECTS:
|
|
two_label_objects.append(obj)
|
|
else:
|
|
single_label_objects.append(obj)
|
|
|
|
print(f"\n{'='*80}")
|
|
print(f"OBJECT-TEXT MATCHING BY TYPE")
|
|
print(f"{'='*80}")
|
|
print(f"Objects matching to VM-#### labels: {len(vm_label_objects)}")
|
|
if vm_label_objects:
|
|
print(f" Types: {', '.join([obj['Name'] for obj in vm_label_objects])}")
|
|
print(f"Objects with 2 internal labels: {len(two_label_objects)}")
|
|
if two_label_objects:
|
|
print(f" Types: {', '.join([obj['Name'] for obj in two_label_objects])}")
|
|
print(f"Objects with 1 internal label: {len(single_label_objects)}")
|
|
if single_label_objects:
|
|
print(f" Types: {', '.join([obj['Name'] for obj in single_label_objects])}")
|
|
|
|
# Separate text by type
|
|
vm_pattern = re.compile(r'VM-\d{4}')
|
|
vm_texts = [t for t in all_text_detections if vm_pattern.search(t['text'])]
|
|
other_texts = [t for t in all_text_detections if not vm_pattern.search(t['text'])]
|
|
|
|
print(f"\nVM-#### labels available: {len(vm_texts)}")
|
|
print(f"Other text available: {len(other_texts)}")
|
|
|
|
all_matches = []
|
|
all_unmatched_objects = []
|
|
all_unmatched_texts = []
|
|
used_texts = set() # Track which texts have been used
|
|
|
|
# Part 1: Match VM-#### objects to VM-#### labels using Hungarian algorithm
|
|
if vm_label_objects:
|
|
print(f"\n{'='*80}")
|
|
print(f"PART 1: Matching VM-#### label objects to VM-#### text")
|
|
print(f"{'='*80}")
|
|
|
|
vm_matching_results = self.match_objects_to_text_hungarian(
|
|
objects=vm_label_objects,
|
|
all_text_detections=vm_texts,
|
|
pattern_regex=vm_pattern,
|
|
max_distance=max_distance,
|
|
img_width=img_width,
|
|
img_height=img_height
|
|
)
|
|
|
|
# Add match type identifier
|
|
for match in vm_matching_results['matches']:
|
|
match['match_type'] = 'vm_label'
|
|
|
|
all_matches.extend(vm_matching_results['matches'])
|
|
all_unmatched_objects.extend(vm_matching_results['unmatched_objects'])
|
|
all_unmatched_texts.extend(vm_matching_results['unmatched_texts'])
|
|
|
|
# Part 2: Match objects with 2 internal labels
|
|
if two_label_objects:
|
|
print(f"\n{'='*80}")
|
|
print(f"PART 2: Matching objects to 2 internal labels")
|
|
print(f"{'='*80}")
|
|
|
|
for obj in two_label_objects:
|
|
obj_bbox = obj['global_bbox']
|
|
obj_name = obj['Name']
|
|
|
|
# Calculate object center
|
|
obj_center_x = obj_bbox['left'] + obj_bbox['width'] / 2
|
|
obj_center_y = obj_bbox['top'] + obj_bbox['height'] / 2
|
|
|
|
# Find all text inside this object's bounding box
|
|
texts_inside = []
|
|
for text_data in other_texts:
|
|
text_id = id(text_data)
|
|
if text_id in used_texts:
|
|
continue
|
|
|
|
text_bbox = text_data['global_bbox']
|
|
|
|
# Check if text center is inside object bbox
|
|
text_center_x = text_bbox['left'] + text_bbox['width'] / 2
|
|
text_center_y = text_bbox['top'] + text_bbox['height'] / 2
|
|
|
|
if (obj_bbox['left'] <= text_center_x <= obj_bbox['right'] and
|
|
obj_bbox['top'] <= text_center_y <= obj_bbox['bottom']):
|
|
|
|
# Calculate distance from text center to object center
|
|
distance_to_center = self.calculate_distance(
|
|
(obj_center_x, obj_center_y),
|
|
(text_center_x, text_center_y)
|
|
)
|
|
|
|
texts_inside.append({
|
|
'text_data': text_data,
|
|
'distance_to_center': distance_to_center,
|
|
'y_position': text_center_y
|
|
})
|
|
|
|
if len(texts_inside) >= 2:
|
|
# Sort by distance to center (closest first)
|
|
texts_inside.sort(key=lambda t: t['distance_to_center'])
|
|
|
|
# Take the 2 closest texts to center
|
|
closest_two = texts_inside[:2]
|
|
|
|
# Sort these 2 by vertical position (top to bottom)
|
|
closest_two.sort(key=lambda t: t['y_position'])
|
|
|
|
top_text = closest_two[0]['text_data']
|
|
bottom_text = closest_two[1]['text_data']
|
|
|
|
# Create match with both labels
|
|
match = {
|
|
'object': obj,
|
|
'object_name': obj_name,
|
|
'object_bbox': obj_bbox,
|
|
'object_confidence': obj['Confidence'],
|
|
'text': f"{top_text['text']} / {bottom_text['text']}",
|
|
'text_top': top_text['text'],
|
|
'text_bottom': bottom_text['text'],
|
|
'text_bbox_top': top_text['global_bbox'],
|
|
'text_bbox_bottom': bottom_text['global_bbox'],
|
|
'text_confidence_top': top_text['confidence'],
|
|
'text_confidence_bottom': bottom_text['confidence'],
|
|
'distance': 0,
|
|
'distance_pixels': 0,
|
|
'match_type': 'two_labels',
|
|
'texts_found_inside': len(texts_inside)
|
|
}
|
|
|
|
all_matches.append(match)
|
|
|
|
# Mark texts as used
|
|
used_texts.add(id(top_text))
|
|
used_texts.add(id(bottom_text))
|
|
|
|
print(f"\n ✓ Match: {obj_name} -> '{top_text['text']}' (top) / '{bottom_text['text']}' (bottom)")
|
|
print(f" Object confidence: {obj['Confidence']:.2f}%")
|
|
print(f" Top text confidence: {top_text['confidence']:.2f}%")
|
|
print(f" Bottom text confidence: {bottom_text['confidence']:.2f}%")
|
|
print(f" Top text distance to center: {closest_two[0]['distance_to_center']:.2f}px")
|
|
print(f" Bottom text distance to center: {closest_two[1]['distance_to_center']:.2f}px")
|
|
if len(texts_inside) > 2:
|
|
print(f" Note: {len(texts_inside)} texts found inside, used 2 closest to center")
|
|
|
|
elif len(texts_inside) == 1:
|
|
# Only found 1 text, but expected 2
|
|
print(f"\n ⚠ Partial match: {obj_name} - Found only 1 text inside (expected 2)")
|
|
print(f" Text: '{texts_inside[0]['text_data']['text']}'")
|
|
all_unmatched_objects.append(obj)
|
|
|
|
else:
|
|
# No text inside
|
|
all_unmatched_objects.append(obj)
|
|
print(f"\n ✗ No match: {obj_name} - No text found inside bounding box (expected 2)")
|
|
|
|
# Part 3: Match other objects to single text inside their bounding boxes
|
|
if single_label_objects:
|
|
print(f"\n{'='*80}")
|
|
print(f"PART 3: Matching objects to single internal text")
|
|
print(f"{'='*80}")
|
|
|
|
for obj in single_label_objects:
|
|
obj_bbox = obj['global_bbox']
|
|
obj_name = obj['Name']
|
|
|
|
# Calculate object center
|
|
obj_center_x = obj_bbox['left'] + obj_bbox['width'] / 2
|
|
obj_center_y = obj_bbox['top'] + obj_bbox['height'] / 2
|
|
|
|
# Find all text inside this object's bounding box
|
|
texts_inside = []
|
|
for text_data in other_texts:
|
|
text_id = id(text_data)
|
|
if text_id in used_texts:
|
|
continue
|
|
|
|
text_bbox = text_data['global_bbox']
|
|
|
|
# Check if text center is inside object bbox
|
|
text_center_x = text_bbox['left'] + text_bbox['width'] / 2
|
|
text_center_y = text_bbox['top'] + text_bbox['height'] / 2
|
|
|
|
if (obj_bbox['left'] <= text_center_x <= obj_bbox['right'] and
|
|
obj_bbox['top'] <= text_center_y <= obj_bbox['bottom']):
|
|
texts_inside.append(text_data)
|
|
|
|
if texts_inside:
|
|
# Choose the text closest to object center
|
|
closest_text = min(texts_inside, key=lambda t: self.calculate_distance(
|
|
(obj_center_x, obj_center_y),
|
|
(t['global_bbox']['left'] + t['global_bbox']['width'] / 2,
|
|
t['global_bbox']['top'] + t['global_bbox']['height'] / 2)
|
|
))
|
|
|
|
# Calculate distance for reporting
|
|
text_center_x = closest_text['global_bbox']['left'] + closest_text['global_bbox']['width'] / 2
|
|
text_center_y = closest_text['global_bbox']['top'] + closest_text['global_bbox']['height'] / 2
|
|
distance_to_center = self.calculate_distance(
|
|
(obj_center_x, obj_center_y),
|
|
(text_center_x, text_center_y)
|
|
)
|
|
|
|
# Create match
|
|
match = {
|
|
'object': obj,
|
|
'object_name': obj_name,
|
|
'object_bbox': obj_bbox,
|
|
'object_confidence': obj['Confidence'],
|
|
'text': closest_text['text'],
|
|
'text_bbox': closest_text['global_bbox'],
|
|
'text_confidence': closest_text['confidence'],
|
|
'distance': distance_to_center,
|
|
'distance_pixels': distance_to_center,
|
|
'match_type': 'single_label',
|
|
'texts_found_inside': len(texts_inside)
|
|
}
|
|
|
|
all_matches.append(match)
|
|
|
|
# Mark text as used
|
|
used_texts.add(id(closest_text))
|
|
|
|
print(f"\n ✓ Match: {obj_name} -> '{closest_text['text']}' (internal)")
|
|
print(f" Object confidence: {obj['Confidence']:.2f}%")
|
|
print(f" Text confidence: {closest_text['confidence']:.2f}%")
|
|
print(f" Distance to center: {distance_to_center:.2f}px")
|
|
if len(texts_inside) > 1:
|
|
print(f" Note: {len(texts_inside)} texts found inside, chose closest to center")
|
|
else:
|
|
# No text inside
|
|
all_unmatched_objects.append(obj)
|
|
print(f"\n ✗ No match: {obj_name} - No text found inside bounding box")
|
|
|
|
# Part 4: Report remaining unmatched texts (those not used)
|
|
for text_data in other_texts:
|
|
if id(text_data) not in used_texts:
|
|
all_unmatched_texts.append(text_data)
|
|
|
|
# Summary
|
|
print(f"\n{'='*80}")
|
|
print(f"MATCHING SUMMARY")
|
|
print(f"{'='*80}")
|
|
print(f"Total matches: {len(all_matches)}")
|
|
print(f" - VM-#### label matches: {sum(1 for m in all_matches if m['match_type'] == 'vm_label')}")
|
|
print(f" - Two-label matches: {sum(1 for m in all_matches if m['match_type'] == 'two_labels')}")
|
|
print(f" - Single-label matches: {sum(1 for m in all_matches if m['match_type'] == 'single_label')}")
|
|
print(f"Unmatched objects: {len(all_unmatched_objects)}")
|
|
print(f"Unmatched texts: {len(all_unmatched_texts)}")
|
|
|
|
if all_unmatched_objects:
|
|
print(f"\nUnmatched objects:")
|
|
for obj in all_unmatched_objects:
|
|
print(f" - {obj['Name']} (confidence: {obj['Confidence']:.2f}%)")
|
|
|
|
if all_unmatched_texts:
|
|
print(f"\nUnmatched texts:")
|
|
for text_data in all_unmatched_texts:
|
|
print(f" - '{text_data['text']}' (confidence: {text_data['confidence']:.2f}%)")
|
|
|
|
return {
|
|
'matches': all_matches,
|
|
'unmatched_objects': all_unmatched_objects,
|
|
'unmatched_texts': all_unmatched_texts,
|
|
'n_objects': len(objects),
|
|
'n_texts': len(all_text_detections),
|
|
'matching_rate': len(all_matches) / len(objects) if objects else 0
|
|
}
|
|
|
|
def deduplicate_text_detections(self, all_text_detections, iou_threshold=0.5):
|
|
"""
|
|
Remove duplicate text detections across overlapping segments
|
|
|
|
Args:
|
|
all_text_detections: List of text detection dicts with global coordinates
|
|
iou_threshold: IoU threshold for considering text as duplicates
|
|
|
|
Returns:
|
|
List of deduplicated text detections
|
|
"""
|
|
if not all_text_detections:
|
|
return []
|
|
|
|
print(f"\n[TEXT DEDUPLICATION] Processing {len(all_text_detections)} text detections...")
|
|
|
|
# Sort by confidence (highest first)
|
|
all_text_detections = sorted(all_text_detections, key=lambda x: x['confidence'], reverse=True)
|
|
|
|
deduplicated = []
|
|
used = set()
|
|
|
|
for i, text_det in enumerate(all_text_detections):
|
|
if i in used:
|
|
continue
|
|
|
|
# Start a new group
|
|
group = [text_det]
|
|
used.add(i)
|
|
|
|
# Find overlapping text with same content
|
|
for j, other_det in enumerate(all_text_detections):
|
|
if j in used or j == i:
|
|
continue
|
|
|
|
# Check if text is the same (case-insensitive)
|
|
if text_det['text'].lower() == other_det['text'].lower():
|
|
iou = self.calculate_iou(text_det['global_bbox'], other_det['global_bbox'])
|
|
|
|
if iou > iou_threshold:
|
|
group.append(other_det)
|
|
used.add(j)
|
|
|
|
# Take the one with highest confidence (already sorted)
|
|
if len(group) > 1:
|
|
print(f" Merged {len(group)} duplicates of '{text_det['text']}'")
|
|
|
|
deduplicated.append(text_det)
|
|
|
|
print(f"[TEXT DEDUPLICATION] Complete: {len(all_text_detections)} -> {len(deduplicated)} text detections")
|
|
|
|
return deduplicated
|
|
|
|
def process_single_diagram(self, diagram_path, output_base_dir,
|
|
grid_size=(5, 5), overlap_percent=10,
|
|
keep_regex_list=None, min_confidence=80,
|
|
custom_labels_confidence=80, iou_threshold=0.3,
|
|
matching_max_distance=200):
|
|
"""
|
|
Complete pipeline: segment, detect text, clean, recognize objects, and match to labels
|
|
|
|
Args:
|
|
diagram_path: Path to input diagram
|
|
output_base_dir: Base directory for all outputs
|
|
grid_size: Tuple (rows, cols) for segmentation
|
|
overlap_percent: Overlap percentage for segments
|
|
keep_regex_list: Regex patterns for text to keep
|
|
min_confidence: Minimum confidence for text removal
|
|
custom_labels_confidence: Minimum confidence for object detection
|
|
iou_threshold: IoU threshold for deduplication (0.3 = 30% overlap)
|
|
matching_max_distance: Maximum distance for matching objects to text (pixels)
|
|
|
|
Returns:
|
|
Dictionary with complete results including matches
|
|
"""
|
|
# Create output directories
|
|
segments_dir = os.path.join(output_base_dir, 'segments')
|
|
text_json_dir = os.path.join(output_base_dir, 'text_detections')
|
|
cleaned_dir = os.path.join(output_base_dir, 'cleaned_segments')
|
|
detections_dir = os.path.join(output_base_dir, 'object_detections')
|
|
|
|
for dir_path in [segments_dir, text_json_dir, cleaned_dir, detections_dir]:
|
|
os.makedirs(dir_path, exist_ok=True)
|
|
|
|
print("="*80)
|
|
print("DIAGRAM PROCESSING PIPELINE")
|
|
print("="*80)
|
|
|
|
# Step 1: Segment the diagram
|
|
print("\n[STEP 1] Segmenting diagram...")
|
|
segments = self.segment_image(diagram_path, segments_dir, grid_size, overlap_percent)
|
|
|
|
# Get original image dimensions
|
|
original_img = Image.open(diagram_path)
|
|
img_width, img_height = original_img.size
|
|
|
|
# Step 2-4: Process each segment
|
|
all_results = []
|
|
all_global_detections = []
|
|
all_text_detections = []
|
|
|
|
for idx, (segment_path, position_info) in enumerate(segments):
|
|
segment_name = os.path.basename(segment_path)
|
|
base_name = os.path.splitext(segment_name)[0]
|
|
|
|
print(f"\n{'='*80}")
|
|
print(f"Processing segment {idx+1}/{len(segments)}: {segment_name}")
|
|
print(f"{'='*80}")
|
|
|
|
# Step 2: Detect text
|
|
print("\n[STEP 2] Detecting text with Textract...")
|
|
textract_data = self.detect_text_segment(segment_path)
|
|
|
|
# Save text detection JSON
|
|
json_path = os.path.join(text_json_dir, f"{base_name}.json")
|
|
with open(json_path, 'w') as f:
|
|
json.dump(textract_data, f, indent=2)
|
|
|
|
word_count = sum(1 for b in textract_data['Blocks'] if b['BlockType'] == 'WORD')
|
|
print(f" Detected {word_count} words")
|
|
|
|
# Extract text with global coordinates
|
|
for block in textract_data['Blocks']:
|
|
if block['BlockType'] == 'WORD':
|
|
bbox = block['Geometry']['BoundingBox']
|
|
|
|
# Convert to global coordinates
|
|
seg_left = position_info['left']
|
|
seg_top = position_info['top']
|
|
seg_width = position_info['width']
|
|
seg_height = position_info['height']
|
|
|
|
global_left = seg_left + int(bbox['Left'] * seg_width)
|
|
global_top = seg_top + int(bbox['Top'] * seg_height)
|
|
global_width = int(bbox['Width'] * seg_width)
|
|
global_height = int(bbox['Height'] * seg_height)
|
|
|
|
all_text_detections.append({
|
|
'text': block['Text'],
|
|
'confidence': block['Confidence'],
|
|
'segment_name': segment_name,
|
|
'global_bbox': {
|
|
'left': global_left,
|
|
'top': global_top,
|
|
'right': global_left + global_width,
|
|
'bottom': global_top + global_height,
|
|
'width': global_width,
|
|
'height': global_height
|
|
}
|
|
})
|
|
|
|
# Step 3: Clean text from segment
|
|
print("\n[STEP 3] Cleaning text from segment...")
|
|
cleaned_path = os.path.join(cleaned_dir, segment_name)
|
|
clean_stats = self.clean_text_from_segment(
|
|
segment_path, textract_data, cleaned_path,
|
|
keep_regex_list=keep_regex_list, min_confidence=min_confidence
|
|
)
|
|
print(f" Removed: {clean_stats['words_removed']} words")
|
|
print(f" Kept: {clean_stats['words_kept']} words")
|
|
|
|
# Step 4: Recognize objects with Custom Labels
|
|
print("\n[STEP 4] Recognizing objects with Custom Labels...")
|
|
detection_results = self.recognize_objects_segment(
|
|
cleaned_path, min_confidence=custom_labels_confidence
|
|
)
|
|
|
|
# Save detection results JSON
|
|
detection_json_path = os.path.join(detections_dir, f"{base_name}_detections.json")
|
|
with open(detection_json_path, 'w') as f:
|
|
json.dump(detection_results, f, indent=2)
|
|
|
|
if detection_results['success']:
|
|
labels = detection_results['custom_labels']
|
|
print(f" Detected {len(labels)} objects:")
|
|
|
|
# Convert to global coordinates and store
|
|
for label in labels:
|
|
print(f" - {label['Name']}: {label['Confidence']:.2f}%")
|
|
|
|
if 'Geometry' in label and 'BoundingBox' in label['Geometry']:
|
|
bbox = label['Geometry']['BoundingBox']
|
|
|
|
# Convert segment-local to global coordinates
|
|
seg_left = position_info['left']
|
|
seg_top = position_info['top']
|
|
seg_width = position_info['width']
|
|
seg_height = position_info['height']
|
|
|
|
global_left = seg_left + int(bbox['Left'] * seg_width)
|
|
global_top = seg_top + int(bbox['Top'] * seg_height)
|
|
global_width = int(bbox['Width'] * seg_width)
|
|
global_height = int(bbox['Height'] * seg_height)
|
|
|
|
global_detection = {
|
|
'Name': label['Name'],
|
|
'Confidence': label['Confidence'],
|
|
'segment_name': segment_name,
|
|
'global_bbox': {
|
|
'left': global_left,
|
|
'top': global_top,
|
|
'right': global_left + global_width,
|
|
'bottom': global_top + global_height,
|
|
'width': global_width,
|
|
'height': global_height
|
|
}
|
|
}
|
|
|
|
all_global_detections.append(global_detection)
|
|
else:
|
|
print(f" Error: {detection_results.get('error', 'Unknown error')}")
|
|
|
|
# Store results
|
|
segment_result = {
|
|
'segment_name': segment_name,
|
|
'segment_path': segment_path,
|
|
'position': position_info,
|
|
'cleaned_path': cleaned_path,
|
|
'text_detection': {
|
|
'total_words': word_count,
|
|
'words_removed': clean_stats['words_removed'],
|
|
'words_kept': clean_stats['words_kept']
|
|
},
|
|
'object_detection': detection_results
|
|
}
|
|
all_results.append(segment_result)
|
|
|
|
# Step 5: Deduplicate detections
|
|
print("\n" + "="*80)
|
|
print("[STEP 5] Deduplicating detections across segments")
|
|
print("="*80)
|
|
|
|
deduplicated_detections = self.deduplicate_detections(
|
|
all_global_detections,
|
|
iou_threshold=iou_threshold
|
|
)
|
|
|
|
print("\n[STEP 5b] Deduplicating text detections")
|
|
deduplicated_text = self.deduplicate_text_detections(
|
|
all_text_detections,
|
|
iou_threshold=0.5
|
|
)
|
|
|
|
# Step 6: Match objects to text based on object type
|
|
print("\n" + "="*80)
|
|
print("[STEP 6] Matching objects to text (by type)")
|
|
print("="*80)
|
|
|
|
matching_results = self.match_objects_to_text_by_type(
|
|
objects=deduplicated_detections,
|
|
all_text_detections=deduplicated_text,
|
|
max_distance=matching_max_distance,
|
|
img_width=img_width,
|
|
img_height=img_height
|
|
)
|
|
|
|
# Generate summary
|
|
print("\n" + "="*80)
|
|
print("PROCESSING COMPLETE - SUMMARY")
|
|
print("="*80)
|
|
|
|
total_objects_raw = len(all_global_detections)
|
|
total_objects_deduplicated = len(deduplicated_detections)
|
|
total_words_detected = sum(r['text_detection']['total_words']
|
|
for r in all_results)
|
|
total_words_removed = sum(r['text_detection']['words_removed']
|
|
for r in all_results)
|
|
|
|
print(f"\nSegments processed: {len(segments)}")
|
|
print(f"Total words detected (raw): {len(all_text_detections)}")
|
|
print(f"Total words after deduplication: {len(deduplicated_text)}")
|
|
print(f"Total words removed: {total_words_removed}")
|
|
print(f"Total objects detected (raw): {total_objects_raw}")
|
|
print(f"Total objects after deduplication: {total_objects_deduplicated}")
|
|
print(f"Total VM-#### labels found: {matching_results.get('n_texts', 0)}")
|
|
print(f"Successful matches: {len(matching_results.get('matches', []))}")
|
|
print(f"Matching rate: {matching_results.get('matching_rate', 0)*100:.1f}%")
|
|
|
|
# Save complete results
|
|
summary_path = os.path.join(output_base_dir, 'processing_summary.json')
|
|
summary = {
|
|
'input_diagram': diagram_path,
|
|
'image_dimensions': {'width': img_width, 'height': img_height},
|
|
'grid_size': grid_size,
|
|
'overlap_percent': overlap_percent,
|
|
'iou_threshold': iou_threshold,
|
|
'matching_max_distance': matching_max_distance,
|
|
'total_segments': len(segments),
|
|
'total_words_detected': total_words_detected,
|
|
'total_words_removed': total_words_removed,
|
|
'total_objects_raw': total_objects_raw,
|
|
'total_objects_deduplicated': total_objects_deduplicated,
|
|
'total_vm_labels': matching_results['n_texts'],
|
|
'total_matches': len(matching_results['matches']),
|
|
'matching_rate': matching_results['matching_rate'],
|
|
'segments': all_results,
|
|
'deduplicated_detections': deduplicated_detections,
|
|
'matching_results': matching_results
|
|
}
|
|
|
|
with open(summary_path, 'w') as f:
|
|
json.dump(summary, f, indent=2)
|
|
|
|
# Save matched pairs to separate file
|
|
matches_path = os.path.join(output_base_dir, 'object_label_matches.json')
|
|
with open(matches_path, 'w') as f:
|
|
json.dump(matching_results, f, indent=2)
|
|
|
|
# Create human-readable matches report
|
|
report_path = os.path.join(output_base_dir, 'matches_report.txt')
|
|
with open(report_path, 'w') as f:
|
|
f.write("="*80 + "\n")
|
|
f.write("OBJECT-TO-LABEL MATCHING REPORT\n")
|
|
f.write("="*80 + "\n\n")
|
|
|
|
f.write(f"Total Objects: {matching_results.get('n_objects', 0)}\n")
|
|
f.write(f"Total Text Labels: {matching_results.get('n_texts', 0)}\n")
|
|
f.write(f"Successful Matches: {len(matching_results.get('matches', []))}\n")
|
|
f.write(f"Matching Rate: {matching_results.get('matching_rate', 0)*100:.1f}%\n\n")
|
|
|
|
f.write("="*80 + "\n")
|
|
f.write("MATCHED PAIRS\n")
|
|
f.write("="*80 + "\n\n")
|
|
|
|
for i, match in enumerate(matching_results.get('matches', []), 1):
|
|
match_type = match.get('match_type', 'vm_label')
|
|
|
|
f.write(f"{i}. {match['object_name']} -> {match['text']}\n")
|
|
f.write(f" Match Type: {match_type}\n")
|
|
f.write(f" Object Confidence: {match['object_confidence']:.2f}%\n")
|
|
|
|
if match_type == 'two_labels':
|
|
f.write(f" Top Text: {match['text_top']}\n")
|
|
f.write(f" Top Text Confidence: {match['text_confidence_top']:.2f}%\n")
|
|
f.write(f" Bottom Text: {match['text_bottom']}\n")
|
|
f.write(f" Bottom Text Confidence: {match['text_confidence_bottom']:.2f}%\n")
|
|
else:
|
|
f.write(f" Text Confidence: {match['text_confidence']:.2f}%\n")
|
|
f.write(f" Distance: {match['distance']:.2f} pixels\n")
|
|
|
|
f.write("\n")
|
|
|
|
if matching_results.get('unmatched_objects'):
|
|
f.write("="*80 + "\n")
|
|
f.write("UNMATCHED OBJECTS\n")
|
|
f.write("="*80 + "\n\n")
|
|
|
|
for obj in matching_results['unmatched_objects']:
|
|
f.write(f"- {obj['Name']} (Confidence: {obj['Confidence']:.2f}%)\n\n")
|
|
|
|
if matching_results.get('unmatched_texts'):
|
|
f.write("="*80 + "\n")
|
|
f.write("UNMATCHED TEXT LABELS\n")
|
|
f.write("="*80 + "\n\n")
|
|
|
|
for text_data in matching_results['unmatched_texts']:
|
|
f.write(f"- {text_data['text']} (Confidence: {text_data['confidence']:.2f}%)\n\n")
|
|
|
|
print(f"\nResults saved to: {output_base_dir}")
|
|
print(f"Summary: {summary_path}")
|
|
print(f"Matches JSON: {matches_path}")
|
|
print(f"Matches Report: {report_path}")
|
|
|
|
return summary
|
|
|
|
def visualize_detections(self, summary_data, output_path, show_duplicates=False):
|
|
"""Create visualization of detections"""
|
|
diagram_path = summary_data['input_diagram']
|
|
img = Image.open(diagram_path)
|
|
draw = ImageDraw.Draw(img)
|
|
|
|
if not show_duplicates:
|
|
deduplicated = summary_data.get('deduplicated_detections', [])
|
|
for detection in deduplicated:
|
|
bbox = detection['global_bbox']
|
|
draw.rectangle(
|
|
[(bbox['left'], bbox['top']), (bbox['right'], bbox['bottom'])],
|
|
outline='green',
|
|
width=3
|
|
)
|
|
label = f"{detection['Name']} ({detection['Confidence']:.1f}%)"
|
|
draw.text((bbox['left'], bbox['top'] - 15), label, fill='green')
|
|
|
|
img.save(output_path)
|
|
print(f"Visualization saved to: {output_path}")
|
|
|
|
def visualize_text_detections(self, summary_data, output_path, show_duplicates=False):
|
|
"""Create visualization of text detections"""
|
|
diagram_path = summary_data['input_diagram']
|
|
img = Image.open(diagram_path)
|
|
img.save(output_path)
|
|
print(f"Text visualization saved to: {output_path}")
|
|
|
|
def visualize_matches(self, summary_data, output_path):
|
|
"""Create visualization of matches"""
|
|
diagram_path = summary_data['input_diagram']
|
|
img = Image.open(diagram_path)
|
|
draw = ImageDraw.Draw(img)
|
|
|
|
matching_results = summary_data.get('matching_results', {})
|
|
matches = matching_results.get('matches', [])
|
|
|
|
for match in matches:
|
|
obj_bbox = match['object_bbox']
|
|
match_type = match.get('match_type', 'vm_label')
|
|
|
|
color = 'blue' if match_type == 'vm_label' else 'green'
|
|
|
|
draw.rectangle(
|
|
[(obj_bbox['left'], obj_bbox['top']),
|
|
(obj_bbox['right'], obj_bbox['bottom'])],
|
|
outline=color,
|
|
width=3
|
|
)
|
|
|
|
img.save(output_path)
|
|
print(f"Match visualization saved to: {output_path}")
|