VintaSoft Imaging .NET SDK 14.1: Документация для .NET разработчика
Vintasoft.Imaging.Text Namespace / TextSearchEngine Class
Члены типа Объект Синтаксис Example Иерархия Требования Смотрите также
    Класс TextSearchEngine
    Представляет абстрактный базовый класс, определяющий алгоритм текстового поиска.
    Объектная модель
    TextSearchEngine
    Синтаксис
    'Declaration
    
    Public MustInherit Class TextSearchEngine
    
    
     
    Пример

    Вот C#/VB.NET код, который демонстрирует, как создать систему текстового поиска для поиска цифр на PDF странице.

    ''' <summary>
    ''' Outputs the information about digits in content of PDF document.
    ''' </summary>
    ''' <param name="document">PDF document where digits should be searched.</param>
    Public Sub SearchDigitsInTextOfPdfDocumentUsingTextSearchEngine(document As Vintasoft.Imaging.Pdf.PdfDocument)
        System.Console.WriteLine("Searching the digits in text of PDF document.")
    
        For i As Integer = 0 To document.Pages.Count - 1
            Dim textRegions As Vintasoft.Imaging.Text.TextRegion() = AdvancedDigitsSearchOnPdfPage(document.Pages(i))
            If textRegions IsNot Nothing Then
                For j As Integer = 0 To textRegions.Length - 1
                    System.Console.WriteLine(String.Format("- Text={0}, Rectangle={1}", textRegions(j).TextContent, textRegions(j).Rectangle))
                Next
            End If
        Next
    
        System.Console.WriteLine("Searching the digits in text of PDF document is finished.")
    End Sub
    
    ''' <summary>
    ''' Searches digits on PDF page.
    ''' </summary>
    ''' <param name="page">PDF page where digits should be searched.</param>
    ''' <returns>An array of text regions on PDF page where text was found.</returns>
    Public Function AdvancedDigitsSearchOnPdfPage(page As Vintasoft.Imaging.Pdf.Tree.PdfPage) As Vintasoft.Imaging.Text.TextRegion()
        Dim textRegions As New System.Collections.Generic.List(Of Vintasoft.Imaging.Text.TextRegion)()
        Dim digitsSearchEngine As New DigitsSearchEngine()
    
        Dim textRegion As Vintasoft.Imaging.Text.TextRegion = Nothing
        Dim startIndex As Integer = 0
        Do
            ' search text
            textRegion = page.TextRegion.FindText(digitsSearchEngine, startIndex, False)
            If textRegion IsNot Nothing Then
                ' add result
                textRegions.Add(textRegion)
                ' shitf start index
                startIndex += textRegion.TextContent.Length
    
            End If
        Loop While textRegion IsNot Nothing
    
        Return textRegions.ToArray()
    End Function
    
    ''' <summary>
    ''' Class for searching the digits in text of PDF page.
    ''' </summary>
    Private Class DigitsSearchEngine
        Inherits Vintasoft.Imaging.Text.TextSearchEngine
    
        ''' <summary>
        ''' Searches the first text matching in the string of PDF page.
        ''' </summary>
        ''' <param name="sourceString">Source string (string of PDF page) where text must be searched.</param>
        ''' <param name="startIndex">The zero-based index, in the sourceString, from which text must be searched.</param>
        ''' <param name="length">The number of characters, in the sourceString, to analyze.</param>
        ''' <param name="rightToLeft">Indicates that text should be searched from right to left.</param>
        ''' <returns>
        ''' Vintasoft.Imaging.Pdf.Content.TextExtraction.TextSearchResult object that
        ''' contains information about searched text if text is found; otherwise, null.
        ''' </returns>
        Public Overrides Function Find(sourceString As String, startIndex As Integer, length As Integer, rightToLeft As Boolean) As Vintasoft.Imaging.Text.TextSearchResult
            Dim startDigitIndex As Integer = -1
            Dim endDigitIndex As Integer = -1
            Dim start As Integer = 0
            Dim [end] As Integer = 0
    
            ' if searching text from the right to the left
            If rightToLeft Then
                start = startIndex + length
                [end] = 0
                For index As Integer = start - 1 To [end] Step -1
                    If Char.IsDigit(sourceString(index)) AndAlso endDigitIndex = -1 Then
                        endDigitIndex = index + 1
                    ElseIf Not Char.IsDigit(sourceString(index)) AndAlso endDigitIndex <> -1 Then
                        startDigitIndex = index + 1
                        Exit For
                    End If
                Next
                If endDigitIndex <> -1 AndAlso startDigitIndex = -1 Then
                    startDigitIndex = 0
                End If
            Else
                ' if searching text from the left to the right
                start = startIndex
                [end] = startIndex + length
                For index As Integer = start To [end] - 1
                    If Char.IsDigit(sourceString(index)) AndAlso startDigitIndex = -1 Then
                        startDigitIndex = index
                    ElseIf Not Char.IsDigit(sourceString(index)) AndAlso startDigitIndex <> -1 Then
                        endDigitIndex = index
                        Exit For
                    End If
                Next
                If startDigitIndex <> -1 AndAlso endDigitIndex = -1 Then
                    endDigitIndex = [end]
                End If
            End If
    
            ' if digit is not found
            If startDigitIndex = -1 Then
                Return Nothing
            End If
    
            ' return the text search result
            Return New Vintasoft.Imaging.Text.TextSearchResult(startDigitIndex, endDigitIndex - startDigitIndex)
        End Function
    End Class
    
    
    /// <summary>
    /// Outputs the information about digits in content of PDF document.
    /// </summary>
    /// <param name="document">PDF document where digits should be searched.</param>
    public void SearchDigitsInTextOfPdfDocumentUsingTextSearchEngine(Vintasoft.Imaging.Pdf.PdfDocument document)
    {
        System.Console.WriteLine("Searching the digits in text of PDF document.");
    
        for (int i = 0; i < document.Pages.Count; i++)
        {
            Vintasoft.Imaging.Text.TextRegion[] textRegions = 
                AdvancedDigitsSearchOnPdfPage(document.Pages[i]);
            if (textRegions != null)
            {
                for (int j = 0; j < textRegions.Length; j++)
                {
                    System.Console.WriteLine(string.Format("- Text={0}, Rectangle={1}",
                        textRegions[j].TextContent,
                        textRegions[j].Rectangle));
                }
            }
        }
    
        System.Console.WriteLine("Searching the digits in text of PDF document is finished.");
    }
    
    /// <summary>
    /// Searches digits on PDF page.
    /// </summary>
    /// <param name="page">PDF page where digits should be searched.</param>
    /// <returns>An array of text regions on PDF page where text was found.</returns>
    public Vintasoft.Imaging.Text.TextRegion[] AdvancedDigitsSearchOnPdfPage(
        Vintasoft.Imaging.Pdf.Tree.PdfPage page)
    {
        System.Collections.Generic.List<Vintasoft.Imaging.Text.TextRegion> textRegions = 
            new System.Collections.Generic.List<Vintasoft.Imaging.Text.TextRegion>();
        DigitsSearchEngine digitsSearchEngine = new DigitsSearchEngine();
    
        Vintasoft.Imaging.Text.TextRegion textRegion = null;
        int startIndex = 0;
        do
        {
            // search text
            textRegion = page.TextRegion.FindText(digitsSearchEngine, ref startIndex, false);
            if (textRegion != null)
            {
                // add result
                textRegions.Add(textRegion);
                // shitf start index
                startIndex += textRegion.TextContent.Length;
            }
    
        } while (textRegion != null);
    
        return textRegions.ToArray();
    }
    
    /// <summary>
    /// Class for searching the digits in text of PDF page.
    /// </summary>
    class DigitsSearchEngine : Vintasoft.Imaging.Text.TextSearchEngine
    {
    
        /// <summary>
        /// Searches the first text matching in the string of PDF page.
        /// </summary>
        /// <param name="sourceString">Source string (string of PDF page) where text must be searched.</param>
        /// <param name="startIndex">The zero-based index, in the sourceString, from which text must be searched.</param>
        /// <param name="length">The number of characters, in the sourceString, to analyze.</param>
        /// <param name="rightToLeft">Indicates that text should be searched from right to left.</param>
        /// <returns>
        /// Vintasoft.Imaging.Pdf.Content.TextExtraction.TextSearchResult object that
        /// contains information about searched text if text is found; otherwise, null.
        /// </returns>
        public override Vintasoft.Imaging.Text.TextSearchResult Find(
            string sourceString, int startIndex, int length, bool rightToLeft)
        {
            int startDigitIndex = -1;
            int endDigitIndex = -1;
            int start = 0;
            int end = 0;
    
            // if searching text from the right to the left
            if (rightToLeft)
            {
                start = startIndex + length;
                end = 0;
                for (int index = start - 1; index >= end; index--)
                {
                    if (char.IsDigit(sourceString[index]) && endDigitIndex == -1)
                        endDigitIndex = index + 1;
                    else if (!char.IsDigit(sourceString[index]) && endDigitIndex != -1)
                    {
                        startDigitIndex = index + 1;
                        break;
                    }
                }
                if (endDigitIndex != -1 && startDigitIndex == -1)
                    startDigitIndex = 0;
            }
            // if searching text from the left to the right
            else
            {
                start = startIndex;
                end = startIndex + length;
                for (int index = start; index < end; index++)
                {
                    if (char.IsDigit(sourceString[index]) && startDigitIndex == -1)
                        startDigitIndex = index;
                    else if (!char.IsDigit(sourceString[index]) && startDigitIndex != -1)
                    {
                        endDigitIndex = index;
                        break;
                    }
                }
                if (startDigitIndex != -1 && endDigitIndex == -1)
                    endDigitIndex = end;
            }
    
            // if digit is not found
            if (startDigitIndex == -1)
                return null;
    
            // return the text search result
            return new Vintasoft.Imaging.Text.TextSearchResult(
                startDigitIndex, endDigitIndex - startDigitIndex);
        }
    }
    
    

    Иерархия наследования

    System.Object
       Vintasoft.Imaging.Text.TextSearchEngine

    Требования

    Целевые платформы: .NET 9; .NET 8; .NET 7; .NET 6; .NET Framework 4.8, 4.7, 4.6, 4.5, 4.0, 3.5

    Смотрите также