Quellcode für meipi.indexing.embedding
"""Helpers for batching images and generating image embeddings.
This module provides small, model-agnostic utilities used by indexing jobs:
- pre-processing image inputs into model-compatible batches
- running batched forward passes on a selected device
- collecting pooled vectors as ``numpy.ndarray``
"""
from typing import List
from itertools import batched
import gc
from tqdm.auto import tqdm
import numpy as np
from torch.utils.data import default_collate
import torch
from transformers import AutoImageProcessor, BatchFeature, BaseImageProcessor
from transformers.image_utils import ImageInput
[Doku]
def check_cuda_memory() -> None:
"""Print all currently alive CUDA tensors for debugging memory usage."""
for obj in filter(lambda o: isinstance(o, torch.Tensor), gc.get_objects()):
if obj.device.type == "cuda":
print(type(obj), obj.size(), obj.device)
[Doku]
def create_image_batches(
images: ImageInput, model_name: str, batch_size: int
) -> List[BatchFeature]:
"""Create model-ready image batches using the matching HuggingFace processor.
Args:
images: Raw image inputs accepted by ``transformers``.
model_name: HuggingFace model id used to resolve ``AutoImageProcessor``.
batch_size: Number of samples per returned batch.
Returns:
A list of ``BatchFeature`` objects containing ``pixel_values`` tensors.
"""
image_processor: BaseImageProcessor = AutoImageProcessor.from_pretrained(
model_name, use_fast=True
)
inputs = image_processor(images)
batches = batched(inputs["pixel_values"], batch_size)
return [
BatchFeature(data={"pixel_values": default_collate(list(batch))}) for batch in batches
]
@torch.no_grad()
def generate_image_embeddings(
model, inp_batches: List[BatchFeature], device="cuda"
) -> np.ndarray:
"""Generate pooled embeddings for all batches and return one stacked array.
The model is temporarily moved to ``device`` for inference and restored to its
original device afterwards.
"""
olddev = model.device
model.to(device)
embeddings = []
for batch in tqdm(inp_batches):
batch.to(device)
out = model(**batch, output_hidden_states=True, output_attentions=False)
vector = out.pooler_output.detach().cpu().numpy().squeeze()
batch.to("cpu", non_blocking=True)
del out, batch
torch.cuda.empty_cache()
embeddings.append(vector)
model.to(olddev)
return np.vstack(embeddings)