@@ -24,6 +24,8 @@ import (
2424 "sync/atomic"
2525 "time"
2626
27+ "github.com/VictoriaMetrics/fastcache"
28+ "github.com/bits-and-blooms/bitset"
2729 segment "github.com/blugelabs/bluge_segment_api"
2830
2931 "github.com/RoaringBitmap/roaring"
@@ -53,6 +55,8 @@ type Writer struct {
5355 asyncTasks sync.WaitGroup
5456
5557 closeOnce sync.Once
58+
59+ cache atomic.Pointer [fastcache.Cache ]
5660}
5761
5862func OpenWriter (config Config ) (* Writer , error ) {
@@ -63,6 +67,10 @@ func OpenWriter(config Config) (*Writer, error) {
6367 closeCh : make (chan struct {}),
6468 }
6569
70+ if config .CacheMaxBytes > 0 {
71+ rv .cache .Store (fastcache .New (config .CacheMaxBytes ))
72+ }
73+
6674 // start the requested number of analysis workers
6775 for i := 0 ; i < config .NumAnalysisWorkers ; i ++ {
6876 config .GoFunc (func () {
@@ -195,10 +203,19 @@ func (s *Writer) fireAsyncError(err error) {
195203func (s * Writer ) Close () (err error ) {
196204 s .closeOnce .Do (func () {
197205 err = s .close ()
206+ s .ResetCache ()
198207 })
199208 return err
200209}
201210
211+ func (s * Writer ) ResetCache () {
212+ c := s .cache .Load ()
213+ if c != nil {
214+ c .Reset ()
215+ s .cache .Store (nil )
216+ }
217+ }
218+
202219func (s * Writer ) close () (err error ) {
203220 startTime := time .Now ()
204221 defer func () {
@@ -296,38 +313,76 @@ func (s *Writer) Batch(batch *Batch) (err error) {
296313 return err
297314}
298315
316+ var id = "_id"
317+
299318func (s * Writer ) removeExistingDocuments (batch * Batch ) error {
319+ if len (batch .unparsedIDs ) == 0 {
320+ return nil
321+ }
322+
300323 root := s .currentSnapshot ()
301324 defer func () { _ = root .Close () }()
325+ removeIDMap := bitset .New (uint (len (batch .unparsedIDs )))
302326
327+ var dict segment.Dictionary
328+ var err error
303329 for _ , seg := range root .segment {
304- dict , err := seg .segment .Dictionary (batch .unparsedIDs [0 ].Field ())
305- if err != nil {
306- return err
307- }
308-
309- for i := 0 ; i < len (batch .unparsedIDs ); i ++ {
310- if ok , _ := dict .Contains (batch .unparsedIDs [i ].Term ()); ! ok {
330+ dict = nil
331+ ff := seg .segment .Fields ()
332+ for i := uint (0 ); i < uint (len (batch .unparsedIDs )); i ++ {
333+ if removeIDMap .Test (i ) {
311334 continue
312335 }
336+ idTerm := batch .unparsedIDs [i ].Term ()
337+ c := s .cache .Load ()
338+ if c != nil {
339+ if ! c .Has (idTerm ) {
340+ if dict == nil {
341+ dict , err = seg .segment .Dictionary (id )
342+ if err != nil {
343+ return err
344+ }
345+ }
346+ if ok , _ := dict .Contains (idTerm ); ! ok {
347+ continue
348+ }
349+ c .Set (idTerm , nil )
350+ }
351+ } else {
352+ if dict == nil {
353+ dict , err = seg .segment .Dictionary (id )
354+ if err != nil {
355+ return err
356+ }
357+ }
358+ if ok , _ := dict .Contains (idTerm ); ! ok {
359+ continue
360+ }
361+ }
362+
313363 fn := batch .fieldNames [i ]
314364 if len (fn ) > 0 {
315- if anyItemNotExist (fn , seg . segment . Fields () ) {
365+ if anyItemNotExist (fn , ff ) {
316366 continue
317367 }
318368 }
319- batch .unparsedDocuments = append (batch .unparsedDocuments [:i ], batch .unparsedDocuments [i + 1 :]... )
320- batch .unparsedIDs = append (batch .unparsedIDs [:i ], batch .unparsedIDs [i + 1 :]... )
321- batch .fieldNames = append (batch .fieldNames [:i ], batch .fieldNames [i + 1 :]... )
322- i --
323- if len (batch .unparsedDocuments ) == 0 {
369+ removeIDMap .Set (i )
370+ if removeIDMap .All () {
324371 return nil
325372 }
326373 }
327374 }
328- if len (batch .unparsedDocuments ) > 0 {
329- batch .documents = append (batch .documents , batch .unparsedDocuments ... )
330- batch .ids = append (batch .ids , batch .unparsedIDs ... )
375+ if removeIDMap .Any () {
376+ for i := uint (0 ); i < uint (len (batch .unparsedIDs )); i ++ {
377+ if removeIDMap .Test (i ) {
378+ continue
379+ }
380+ batch .documents = append (batch .documents , batch .unparsedDocuments [i ])
381+ batch .ids = append (batch .ids , batch .unparsedIDs [i ])
382+ }
383+ } else {
384+ batch .documents = batch .unparsedDocuments
385+ batch .ids = batch .unparsedIDs
331386 }
332387 return nil
333388}
0 commit comments