yaml
type: "io.kestra.plugin.ai.rag.ingestdocument"
Examples
yaml
id: document_ingestion
namespace: company.ai
tasks:
- id: ingest
type: io.kestra.plugin.ai.rag.IngestDocument
provider:
type: io.kestra.plugin.ai.provider.GoogleGemini
modelName: gemini-embedding-exp-03-07
apiKey: "{{ kv('GEMINI_API_KEY') }}"
embeddings:
type: io.kestra.plugin.ai.embeddings.KestraKVStore
drop: true
fromExternalURLs:
- https://raw.githubusercontent.com/kestra-io/docs/refs/heads/main/content/blogs/release-0-24.md
Properties
embeddings *RequiredNon-dynamicChromaElasticsearchKestraKVStoreMilvusMongoDBAtlasPGVectorPineconeQdrantRedisWeaviate
provider *RequiredNon-dynamicAmazonBedrockAnthropicAzureOpenAIDeepSeekGoogleGeminiGoogleVertexAIMistralAIOllamaOpenAI
documentSplitter Non-dynamicIngestDocument-DocumentSplitter
drop booleanstring
Default
false
fromExternalURLs array
SubType string
fromInternalURIs array
SubType string
fromPath string
metadata object
SubType string
Outputs
embeddingStoreOutputs object
ingestedDocuments integer
inputTokenCount integer
outputTokenCount integer
totalTokenCount integer
Definitions
Azure OpenAI Model Provider
endpoint *Requiredstring
modelName *Requiredstring
type *Requiredobject
apiKey string
clientId string
clientSecret string
serviceVersion string
tenantId string
PGVector Embedding Store
database *Requiredstring
host *Requiredstring
password *Requiredstring
port *Requiredintegerstring
table *Requiredstring
type *Requiredobject
user *Requiredstring
useIndex booleanstring
Default
false
Qdrant Embedding Store
apiKey *Requiredstring
collectionName *Requiredstring
host *Requiredstring
port *Requiredintegerstring
type *Requiredobject
Google VertexAI Model Provider
endpoint *Requiredstring
location *Requiredstring
modelName *Requiredstring
project *Requiredstring
type *Requiredobject
Google Gemini Model Provider
apiKey *Requiredstring
modelName *Requiredstring
type *Requiredobject
MongoDB Atlas Embedding Store
collectionName *Requiredstring
host *Requiredstring
indexName *Requiredstring
scheme *Requiredstring
type *Requiredobject
createIndex booleanstring
database string
metadataFieldNames array
SubType string
options object
password string
username string
Mistral AI Model Provider
apiKey *Requiredstring
modelName *Requiredstring
type *Requiredobject
baseUrl string
In-memory embedding store that stores data as Kestra KV pairs
type *Requiredobject
kvName string
Default
{{flow.id}}-embedding-store
Chroma Embedding Store
baseUrl *Requiredstring
collectionName *Requiredstring
type *Requiredobject
Redis Embedding Store
host *Requiredstring
port *Requiredintegerstring
type *Requiredobject
indexName string
Default
embedding-index
io.kestra.plugin.ai.embeddings.Elasticsearch-ElasticsearchConnection-BasicAuth
password string
username string
Milvus Embedding Store
token *Requiredstring
type *Requiredobject
autoFlushOnDelete booleanstring
autoFlushOnInsert booleanstring
collectionName string
consistencyLevel string
databaseName string
host string
idFieldName string
indexType string
metadataFieldName string
metricType string
password string
port integerstring
retrieveEmbeddingsOnSearch booleanstring
textFieldName string
uri string
username string
vectorFieldName string
Deepseek Model Provider
apiKey *Requiredstring
modelName *Requiredstring
type *Requiredobject
baseUrl string
Default
https://api.deepseek.com/v1
Pinecone Embedding Store
apiKey *Requiredstring
cloud *Requiredstring
index *Requiredstring
region *Requiredstring
type *Requiredobject
namespace string
Anthropic AI Model Provider
apiKey *Requiredstring
modelName *Requiredstring
type *Requiredobject
io.kestra.plugin.ai.rag.IngestDocument-DocumentSplitter
maxOverlapSizeInChars *Requiredinteger
maxSegmentSizeInChars *Requiredinteger
splitter string
Default
RECURSIVE
Possible Values
RECURSIVE
PARAGRAPH
LINE
SENTENCE
WORD
Weaviate Embedding Store
apiKey *Requiredstring
host *Requiredstring
type *Requiredobject
avoidDups booleanstring
consistencyLevel string
Possible Values
ONE
QUORUM
ALL
grpcPort integerstring
metadataFieldName string
metadataKeys array
SubType string
objectClass string
port integerstring
scheme string
securedGrpc booleanstring
useGrpcForInserts booleanstring
Ollama Model Provider
endpoint *Requiredstring
modelName *Requiredstring
type *Requiredobject
OpenAI Model Provider
apiKey *Requiredstring
modelName *Requiredstring
type *Requiredobject
baseUrl string
io.kestra.plugin.ai.rag.IngestDocument-InlineDocument
content *Requiredstring
metadata object
io.kestra.plugin.ai.embeddings.Elasticsearch-ElasticsearchConnection
hosts *Requiredarray
SubType string
Min items
1
headers array
SubType string
pathPrefix string
strictDeprecationMode booleanstring
trustAllSsl booleanstring
Elasticsearch Embedding Store
connection *RequiredElasticsearch-ElasticsearchConnection
indexName *Requiredstring
type *Requiredobject
Amazon Bedrock Model Provider
accessKeyId *Requiredstring
modelName *Requiredstring
secretAccessKey *Requiredstring
type *Requiredobject
modelType string
Default
COHERE
Possible Values
COHERE
TITAN