IFieldExtractor

IFieldExtractor interface

Provides methods for extracting fields from a document.

public interface IFieldExtractor

Properties

Name Description
Extensions { get; } Gets the supported extensions.

Methods

Name Description
GetFields(Stream) Extracts all fields from the specified document.
GetFields(string) Extracts all fields from the specified document.

Remarks

Learn more

Examples

The example demonstrates how to implement the interface IFieldExtractor.

public class LogExtractor : IFieldExtractor
{
    private readonly string[] extensions = new string[] { ".log" };

    public string[] Extensions
    {
        get { return extensions; }
    }

    public DocumentField[] GetFields(string filePath)
    {
        FileInfo fileInfo = new FileInfo(filePath);
        DocumentField[] fields = new DocumentField[]
        {
            new DocumentField("FileName", fileInfo.FullName),
            new DocumentField("CreationDate", fileInfo.CreationTime.ToString(CultureInfo.InvariantCulture)),
            new DocumentField("Content", ExtractContent(filePath)),
        };
        return fields;
    }

    private string ExtractContent(string filePath)
    {
        StringBuilder result = new StringBuilder();
        using (StreamReader streamReader = File.OpenText(filePath))
        {
            string line = streamReader.ReadLine();
            string processedLine = line.Remove(0, 12);
            result.AppendLine(processedLine);
        }
        return result.ToString();
    }
}

The example demonstrates how to use the custorm extractor for indexing.

string indexFolder = @"c:\MyIndex\"; // Specify path to the index folder
string documentsFolder = @"c:\MyDocuments\"; // Specify path to a folder containing documents to search

Index index = new Index(indexFolder); // Creating or loading an index

index.IndexSettings.CustomExtractors.Add(new LogExtractor()); // Adding custom text extractor to index settings

index.Add(documentsFolder); // Indexing documents from the specified folder

See Also