Segment Image Crops

To aid in more fine-tuned data analysis, sometimes it’s useful to get a smaller crop of the larger document. Redshred has a feature that allows you to do this called, the “Segment Cropper”. For any segment that has been created, you can retrieve the exact image crop that was used to generate the segment.

Query Endpoints

The Segment Cropper endpoint is:

https://api.staging.redshred.com/v2/collections/{collection_name}/services/crop_by_link?segment_link={segment_link}

Sample API Call to invoke the endpoint:

export REDSHRED_HOST="https://api.staging.redshred.com"
export COLLECTION="tracee"
export REDSHRED_TOKEN="<token>"

# Segment Link
export SEGMENT_LINK="https://api.staging.redshred.com/v2/collections/tracee/perspectives/L7w269YfgcJAKaF3xpbaV2/segments/HYSiSRBdFAsugxh5aWEeik"

# Invoke the endpoint

curl -X GET "$REDSHRED_HOST/v2/collections/$COLLECTION/services/crop_by_link?segment_link=$SEGMENT_LINK" -H "Authorization: Token $REDSHRED_TOKEN" --output "crop.png"
from redshred import RedshredClient
from io import BytesIO
from PIL import Image

rs = RedshredClient(token="<token>", host="https://api.staging.redshred.com")

collection = rs.collection("tracee")

formula_segments = collection.q('segment_type = "formula" and perspective.name = "docling"')
for i, eq in enumerate(formula_segments):
    _, page_index = eq.regions.get_offsets()

    doc = eq.document()
    page_text = doc.page(page_index).text
    img_bytes = rs.api.get(eq.image_link).content

    image = Image.open(BytesIO(img_bytes))
    image.save(f"crop_{i}.png")
import (
	"bytes"
	"fmt"
	"io"
	"os"

	"github.com/redshred/redshred-client-go/pkg/redshred"
)

// downloadAndSaveImage downloads an image from the given URL and saves it to a file
func DownloadAndSaveImage(client *redshred.RedShredClient, imageURL, outputPath string) error {
	// Execute the request
	resp, err := DownloadImage(client, imageURL)
	if err != nil {
		return fmt.Errorf("downloading image: %w", err)
	}

	// Create output file
	out, err := os.Create(outputPath)
	if err != nil {
		return fmt.Errorf("creating file %s: %w", outputPath, err)
	}
	defer out.Close()

	// Save the image
	_, err = io.Copy(out, bytes.NewReader(resp))
	if err != nil {
		return fmt.Errorf("saving image to %s: %w", outputPath, err)
	}

	return nil
}

// downloadImage downloads an image from the given URL and returns the image data
func DownloadImage(client *redshred.RedShredClient, imageURL string) ([]byte, error) {
	resp, err := client.JSONRequest("GET", imageURL, nil)
	if err != nil {
		return nil, fmt.Errorf("downloading image: %w", err)
	}
	return resp, nil
}

func main() {
	// Initialize RedShred client
	rs := redshred.RedShredClient{
		Token: os.Getenv("REDSHRED_TOKEN"),
	}

	// Get the collection
	coll, err := rs.Collection("tracee")
	if err != nil {
		panic(fmt.Sprintf("Error getting collection: %v", err))
	}

	// Query for formula segments
	segments, err := coll.Query(redshred.RQLParams{
		Q:      `segment_type = "formula" and perspective.name = "docling"`,
		Fields: []string{"id", "text", "self_link"},
		Size:   1,
	})
	if err != nil {
		panic(fmt.Sprintf("Error querying segments: %v", err))
	}

	// Process each segment
	for i, segment := range segments {
		// Construct the image URL
		imageURL := fmt.Sprintf("%s/v2/collections/%s/services/crop_by_link?segment_link=%s",
			rs.BaseUrl, coll.Name, segment.SelfLink)

		// Generate output filename
		outputFile := fmt.Sprintf("crop_%d.png", i)

		// Download and save the image
		if err := DownloadAndSaveImage(&rs, imageURL, outputFile); err != nil {
			fmt.Printf("Error: %v\n", err)
			continue
		}
		fmt.Printf("Successfully saved image to %s\n", outputFile)
	}
}