VintaSoft Imaging .NET SDK 14.0: Документация для .NET разработчика
Vintasoft.Imaging.Text Namespace / TextSearchEngine Class
Члены типа Объект Синтаксис Example Иерархия Требования Смотрите также
В этом разделе
    Класс TextSearchEngine
    В этом разделе
    Представляет абстрактный базовый класс, определяющий алгоритм текстового поиска.
    Объектная модель
    TextSearchEngine
    Синтаксис
    'Declaration
    
    Public MustInherit Class TextSearchEngine
    
    
    public abstract class TextSearchEngine
    
    
    public __gc abstract class TextSearchEngine
    
    
    public ref class TextSearchEngine abstract
    
    
    Пример

    Вот C#/VB.NET код, который демонстрирует, как создать систему текстового поиска для поиска цифр на PDF странице.

    
    ''' <summary>
    ''' Outputs the information about digits in content of PDF document.
    ''' </summary>
    ''' <param name="document">PDF document where digits should be searched.</param>
    Public Sub SearchDigitsInTextOfPdfDocumentUsingTextSearchEngine(document As Vintasoft.Imaging.Pdf.PdfDocument)
        System.Console.WriteLine("Searching the digits in text of PDF document.")
    
        For i As Integer = 0 To document.Pages.Count - 1
            Dim textRegions As Vintasoft.Imaging.Text.TextRegion() = AdvancedDigitsSearchOnPdfPage(document.Pages(i))
            If textRegions IsNot Nothing Then
                For j As Integer = 0 To textRegions.Length - 1
                    System.Console.WriteLine(String.Format("- Text={0}, Rectangle={1}", textRegions(j).TextContent, textRegions(j).Rectangle))
                Next
            End If
        Next
    
        System.Console.WriteLine("Searching the digits in text of PDF document is finished.")
    End Sub
    
    ''' <summary>
    ''' Searches digits on PDF page.
    ''' </summary>
    ''' <param name="page">PDF page where digits should be searched.</param>
    ''' <returns>An array of text regions on PDF page where text was found.</returns>
    Public Function AdvancedDigitsSearchOnPdfPage(page As Vintasoft.Imaging.Pdf.Tree.PdfPage) As Vintasoft.Imaging.Text.TextRegion()
        Dim textRegions As New System.Collections.Generic.List(Of Vintasoft.Imaging.Text.TextRegion)()
        Dim digitsSearchEngine As New DigitsSearchEngine()
    
        Dim textRegion As Vintasoft.Imaging.Text.TextRegion = Nothing
        Dim startIndex As Integer = 0
        Do
            ' search text
            textRegion = page.TextRegion.FindText(digitsSearchEngine, startIndex, False)
            If textRegion IsNot Nothing Then
                ' add result
                textRegions.Add(textRegion)
                ' shitf start index
                startIndex += textRegion.TextContent.Length
    
            End If
        Loop While textRegion IsNot Nothing
    
        Return textRegions.ToArray()
    End Function
    
    ''' <summary>
    ''' Class for searching the digits in text of PDF page.
    ''' </summary>
    Private Class DigitsSearchEngine
        Inherits Vintasoft.Imaging.Text.TextSearchEngine
    
        ''' <summary>
        ''' Searches the first text matching in the string of PDF page.
        ''' </summary>
        ''' <param name="sourceString">Source string (string of PDF page) where text must be searched.</param>
        ''' <param name="startIndex">The zero-based index, in the sourceString, from which text must be searched.</param>
        ''' <param name="length">The number of characters, in the sourceString, to analyze.</param>
        ''' <param name="rightToLeft">Indicates that text should be searched from right to left.</param>
        ''' <returns>
        ''' Vintasoft.Imaging.Pdf.Content.TextExtraction.TextSearchResult object that
        ''' contains information about searched text if text is found; otherwise, null.
        ''' </returns>
        Public Overrides Function Find(sourceString As String, startIndex As Integer, length As Integer, rightToLeft As Boolean) As Vintasoft.Imaging.Text.TextSearchResult
            Dim startDigitIndex As Integer = -1
            Dim endDigitIndex As Integer = -1
            Dim start As Integer = 0
            Dim [end] As Integer = 0
    
            ' if searching text from the right to the left
            If rightToLeft Then
                start = startIndex + length
                [end] = 0
                For index As Integer = start - 1 To [end] Step -1
                    If Char.IsDigit(sourceString(index)) AndAlso endDigitIndex = -1 Then
                        endDigitIndex = index + 1
                    ElseIf Not Char.IsDigit(sourceString(index)) AndAlso endDigitIndex <> -1 Then
                        startDigitIndex = index + 1
                        Exit For
                    End If
                Next
                If endDigitIndex <> -1 AndAlso startDigitIndex = -1 Then
                    startDigitIndex = 0
                End If
            Else
                ' if searching text from the left to the right
                start = startIndex
                [end] = startIndex + length
                For index As Integer = start To [end] - 1
                    If Char.IsDigit(sourceString(index)) AndAlso startDigitIndex = -1 Then
                        startDigitIndex = index
                    ElseIf Not Char.IsDigit(sourceString(index)) AndAlso startDigitIndex <> -1 Then
                        endDigitIndex = index
                        Exit For
                    End If
                Next
                If startDigitIndex <> -1 AndAlso endDigitIndex = -1 Then
                    endDigitIndex = [end]
                End If
            End If
    
            ' if digit is not found
            If startDigitIndex = -1 Then
                Return Nothing
            End If
    
            ' return the text search result
            Return New Vintasoft.Imaging.Text.TextSearchResult(startDigitIndex, endDigitIndex - startDigitIndex)
        End Function
    End Class
    
    
    
    /// <summary>
    /// Outputs the information about digits in content of PDF document.
    /// </summary>
    /// <param name="document">PDF document where digits should be searched.</param>
    public void SearchDigitsInTextOfPdfDocumentUsingTextSearchEngine(Vintasoft.Imaging.Pdf.PdfDocument document)
    {
        System.Console.WriteLine("Searching the digits in text of PDF document.");
    
        for (int i = 0; i < document.Pages.Count; i++)
        {
            Vintasoft.Imaging.Text.TextRegion[] textRegions = 
                AdvancedDigitsSearchOnPdfPage(document.Pages[i]);
            if (textRegions != null)
            {
                for (int j = 0; j < textRegions.Length; j++)
                {
                    System.Console.WriteLine(string.Format("- Text={0}, Rectangle={1}",
                        textRegions[j].TextContent,
                        textRegions[j].Rectangle));
                }
            }
        }
    
        System.Console.WriteLine("Searching the digits in text of PDF document is finished.");
    }
    
    /// <summary>
    /// Searches digits on PDF page.
    /// </summary>
    /// <param name="page">PDF page where digits should be searched.</param>
    /// <returns>An array of text regions on PDF page where text was found.</returns>
    public Vintasoft.Imaging.Text.TextRegion[] AdvancedDigitsSearchOnPdfPage(
        Vintasoft.Imaging.Pdf.Tree.PdfPage page)
    {
        System.Collections.Generic.List<Vintasoft.Imaging.Text.TextRegion> textRegions = 
            new System.Collections.Generic.List<Vintasoft.Imaging.Text.TextRegion>();
        DigitsSearchEngine digitsSearchEngine = new DigitsSearchEngine();
    
        Vintasoft.Imaging.Text.TextRegion textRegion = null;
        int startIndex = 0;
        do
        {
            // search text
            textRegion = page.TextRegion.FindText(digitsSearchEngine, ref startIndex, false);
            if (textRegion != null)
            {
                // add result
                textRegions.Add(textRegion);
                // shitf start index
                startIndex += textRegion.TextContent.Length;
            }
    
        } while (textRegion != null);
    
        return textRegions.ToArray();
    }
    
    /// <summary>
    /// Class for searching the digits in text of PDF page.
    /// </summary>
    class DigitsSearchEngine : Vintasoft.Imaging.Text.TextSearchEngine
    {
    
        /// <summary>
        /// Searches the first text matching in the string of PDF page.
        /// </summary>
        /// <param name="sourceString">Source string (string of PDF page) where text must be searched.</param>
        /// <param name="startIndex">The zero-based index, in the sourceString, from which text must be searched.</param>
        /// <param name="length">The number of characters, in the sourceString, to analyze.</param>
        /// <param name="rightToLeft">Indicates that text should be searched from right to left.</param>
        /// <returns>
        /// Vintasoft.Imaging.Pdf.Content.TextExtraction.TextSearchResult object that
        /// contains information about searched text if text is found; otherwise, null.
        /// </returns>
        public override Vintasoft.Imaging.Text.TextSearchResult Find(
            string sourceString, int startIndex, int length, bool rightToLeft)
        {
            int startDigitIndex = -1;
            int endDigitIndex = -1;
            int start = 0;
            int end = 0;
    
            // if searching text from the right to the left
            if (rightToLeft)
            {
                start = startIndex + length;
                end = 0;
                for (int index = start - 1; index >= end; index--)
                {
                    if (char.IsDigit(sourceString[index]) && endDigitIndex == -1)
                        endDigitIndex = index + 1;
                    else if (!char.IsDigit(sourceString[index]) && endDigitIndex != -1)
                    {
                        startDigitIndex = index + 1;
                        break;
                    }
                }
                if (endDigitIndex != -1 && startDigitIndex == -1)
                    startDigitIndex = 0;
            }
            // if searching text from the left to the right
            else
            {
                start = startIndex;
                end = startIndex + length;
                for (int index = start; index < end; index++)
                {
                    if (char.IsDigit(sourceString[index]) && startDigitIndex == -1)
                        startDigitIndex = index;
                    else if (!char.IsDigit(sourceString[index]) && startDigitIndex != -1)
                    {
                        endDigitIndex = index;
                        break;
                    }
                }
                if (startDigitIndex != -1 && endDigitIndex == -1)
                    endDigitIndex = end;
            }
    
            // if digit is not found
            if (startDigitIndex == -1)
                return null;
    
            // return the text search result
            return new Vintasoft.Imaging.Text.TextSearchResult(
                startDigitIndex, endDigitIndex - startDigitIndex);
        }
    }
    
    

    Иерархия наследования

    System.Object
       Vintasoft.Imaging.Text.TextSearchEngine

    Требования

    Целевые платформы: .NET 8; .NET 7; .NET 6; .NET Framework 4.8, 4.7, 4.6, 4.5, 4.0, 3.5

    Смотрите также