To aid in more fine-tuned data analysis, sometimes it’s useful to get a smaller crop of the larger document. Redshred has a feature that allows you to do this called, the “Segment Cropper”. For any segment that has been created, you can retrieve the exact image crop that was used to generate the segment.
The Segment Cropper endpoint is:
https://api.staging.redshred.com/v2/collections/{collection_name}/services/crop_by_link?segment_link={segment_link}
Sample API Call to invoke the endpoint:
export REDSHRED_HOST="https://api.staging.redshred.com"
export COLLECTION="tracee"
export REDSHRED_TOKEN="<token>"
# Segment Link
export SEGMENT_LINK="https://api.staging.redshred.com/v2/collections/tracee/perspectives/L7w269YfgcJAKaF3xpbaV2/segments/HYSiSRBdFAsugxh5aWEeik"
# Invoke the endpoint
curl -X GET "$REDSHRED_HOST/v2/collections/$COLLECTION/services/crop_by_link?segment_link=$SEGMENT_LINK" -H "Authorization: Token $REDSHRED_TOKEN" --output "crop.png"
from redshred import RedshredClient
from io import BytesIO
from PIL import Image
rs = RedshredClient(token="<token>", host="https://api.staging.redshred.com")
collection = rs.collection("tracee")
formula_segments = collection.q('segment_type = "formula" and perspective.name = "docling"')
for i, eq in enumerate(formula_segments):
_, page_index = eq.regions.get_offsets()
doc = eq.document()
page_text = doc.page(page_index).text
img_bytes = rs.api.get(eq.image_link).content
image = Image.open(BytesIO(img_bytes))
image.save(f"crop_{i}.png")
import (
"bytes"
"fmt"
"io"
"os"
"github.com/redshred/redshred-client-go/pkg/redshred"
)
// downloadAndSaveImage downloads an image from the given URL and saves it to a file
func DownloadAndSaveImage(client *redshred.RedShredClient, imageURL, outputPath string) error {
// Execute the request
resp, err := DownloadImage(client, imageURL)
if err != nil {
return fmt.Errorf("downloading image: %w", err)
}
// Create output file
out, err := os.Create(outputPath)
if err != nil {
return fmt.Errorf("creating file %s: %w", outputPath, err)
}
defer out.Close()
// Save the image
_, err = io.Copy(out, bytes.NewReader(resp))
if err != nil {
return fmt.Errorf("saving image to %s: %w", outputPath, err)
}
return nil
}
// downloadImage downloads an image from the given URL and returns the image data
func DownloadImage(client *redshred.RedShredClient, imageURL string) ([]byte, error) {
resp, err := client.JSONRequest("GET", imageURL, nil)
if err != nil {
return nil, fmt.Errorf("downloading image: %w", err)
}
return resp, nil
}
func main() {
// Initialize RedShred client
rs := redshred.RedShredClient{
Token: os.Getenv("REDSHRED_TOKEN"),
}
// Get the collection
coll, err := rs.Collection("tracee")
if err != nil {
panic(fmt.Sprintf("Error getting collection: %v", err))
}
// Query for formula segments
segments, err := coll.Query(redshred.RQLParams{
Q: `segment_type = "formula" and perspective.name = "docling"`,
Fields: []string{"id", "text", "self_link"},
Size: 1,
})
if err != nil {
panic(fmt.Sprintf("Error querying segments: %v", err))
}
// Process each segment
for i, segment := range segments {
// Construct the image URL
imageURL := fmt.Sprintf("%s/v2/collections/%s/services/crop_by_link?segment_link=%s",
rs.BaseUrl, coll.Name, segment.SelfLink)
// Generate output filename
outputFile := fmt.Sprintf("crop_%d.png", i)
// Download and save the image
if err := DownloadAndSaveImage(&rs, imageURL, outputFile); err != nil {
fmt.Printf("Error: %v\n", err)
continue
}
fmt.Printf("Successfully saved image to %s\n", outputFile)
}
}