function readOnly(count){ }
Starting November 20, the site will be set to read-only. On December 4, 2023,
forum discussions will move to the Trailblazer Community.
+ Start a Discussion
Sandhya K 10Sandhya K 10 

Batch Apex to Delete Duplicate records with same Email id

I have a custom Object Book3__c , In which I have the records as Below.

Name          Email__c
Book1    hh@gmail.com
Book2    hh@gmail.com

Book3    gg@gmail.com
Book4    gg@gmail.com

Book5    kk@gmail.com
Book6    kk@gmail.com
Book7    kk@gmail.com

The problem with the below code is, It deletes all the records . But I need to Keep one and delete the remaining. What Am I missing here.?
 
Global class removeDuplicateRecords implements Database.Batchable<SObject> , Database.Stateful {
    
    global Set<String> emailstring;
    
    global Database.QueryLocator start(Database.BatchableContext BC){
      return Database.getQueryLocator([Select Email__c from Book3__c where Email__c != null]);
      }
    
    global void execute(Database.BatchableContext BC , List<Book3__c> scope){
        
        Set<String> emailstring = new Set<String>();
        
      //  Map<String , Book3__c> EmailBookmap = new Map<String , Book3__c>();
        for(Book3__c s : scope){
        // EmailBookmap.put(s.Email__c , s);
            emailstring.add(s.Email__c);
            
        }  
       // system.debug(EmailBookmap);
        system.debug(emailstring);
        
        List<Book3__c> bklst = [select Email__c from Book3__c where Email__c In :emailstring ];
        
        List<Book3__c> duplicatelist = new List<Book3__c>();
        
        for(Book3__c bk : Scope){
        for(Book3__c b :bklst){
           // if(emailstring.contains(b.Email__c)){
            if(bk.email__c == b.email__c){
              duplicatelist.add(b);  
            }
            else{
             emailstring.add(bk.Email__c);   
            }
        }
        }
        
       // system.debug(EmailBookmap);
        system.debug(duplicatelist);
      delete duplicatelist;
    }
    
    global void finish(Database.BatchableContext BC){
        
    }
        

}

 
Best Answer chosen by Sandhya K 10
Tarun J.Tarun J.
Update first line as below:
 
global class removeDuplicateRecords implements Database.Batchable<SObject> , Database.Stateful {

-Thanks,
TK

All Answers

Tarun J.Tarun J.
Hello Sandhya,

Try this:
 
global class removeDuplicateRecords implements Database.Batchable<SObject> , Database.Stateful {
    
    global Set<String> emailstring;
    
    global Database.QueryLocator start(Database.BatchableContext BC){
      return Database.getQueryLocator([Select Email__c from Book3__c where Email__c != null]);
      }
    
    global void execute(Database.BatchableContext BC , List<Book3__c> scope){
        
        Set<String> emailstring = new Set<String>();
		List<Book3__c> duplicatelist = new List<Book3__c>();
        
        Map<String , Book3__c> EmailBookmap = new Map<String , Book3__c>();
        for(Book3__c s : scope){
			if(!EmailBookmap.containsKey(s.Email__c)){
				EmailBookmap.put(s.Email__c , s);
			}
			else{
				duplicatelist.add(s);			
			}                        
        }  
       // system.debug(EmailBookmap);
        /*system.debug(emailstring);
        
        List<Book3__c> bklst = [select Email__c from Book3__c where Email__c In :emailstring ];
        
		for(Book3__c bk : Scope){
        for(Book3__c b :bklst){
           // if(emailstring.contains(b.Email__c)){
            if(bk.email__c == b.email__c){
              duplicatelist.add(b);  
            }
            else{
             emailstring.add(bk.Email__c);   
            }
        }
        }*/
        
       // system.debug(EmailBookmap);
        system.debug(duplicatelist);
		if(duplicatelist.size() > 0){
			delete duplicatelist;
		}
    }
    
    global void finish(Database.BatchableContext BC){
        
    }
        

}

-Thanks,
TK

Did this answer your question? If not, let me know what didn't work, or if so, please mark it solved.
Sandhya K 10Sandhya K 10
Hi Tarun , 

The above code deletes the duplicate records .But the problem is , I have 11 records in total and gave Batch size as 3 .Since Each time the batch executes and it is treated as a transaction , I am still seeing few duplicate records. How do i resolve this.  ? 
Tarun J.Tarun J.
Comment Line #14 and put below code at line #4 and try:
 
global Map<String , Book3__c> EmailBookmap = new Map<String , Book3__c>();

-Thanks,
TK

Did this answer your question? If not, let me know what didn't work, or if so, please mark it solved.
Sandhya K 10Sandhya K 10
Hi Tarun,

Records before executionRecords after execution
Sandhya K 10Sandhya K 10
Still seeing few duplicate records. After changing Map to global 
Tarun J.Tarun J.
Can you share the updated code?
Sandhya K 10Sandhya K 10
global class removeDuplicateRecords implements Database.Batchable<SObject>  {
    
  Global Map<String , Book3__c> EmailBookmap = new Map<String , Book3__c>();
    
    global Database.QueryLocator start(Database.BatchableContext BC){
      return Database.getQueryLocator([Select Email__c from Book3__c where Email__c != null]);
      }
    
    
    
    global void execute(Database.BatchableContext BC , List<Book3__c> scope){
        
        
		
        
      // Map<String , Book3__c> EmailBookmap = new Map<String , Book3__c>();
        
        List<Book3__c> duplicatelist = new List<Book3__c>();
        for(Book3__c s : scope){
			if(! EmailBookmap.containsKey(s.Email__c)){
				EmailBookmap.put(s.Email__c , s);
			}
			else{
				duplicatelist.add(s);			
			}                        
        }  
               
       
        system.debug(duplicatelist);
		if(duplicatelist.size() > 0){
			delete duplicatelist;
		}
    }
    
    global void finish(Database.BatchableContext BC){
        
    }
        

}

 
Tarun J.Tarun J.
Update first line as below:
 
global class removeDuplicateRecords implements Database.Batchable<SObject> , Database.Stateful {

-Thanks,
TK
This was selected as the best answer
Sandhya K 10Sandhya K 10
That works Tarun. Thankyou very much..!
Polam VishalPolam Vishal
Hi,
We can restrict allowing duplicate email values by selecting the checkbox "Don not allow duplicate values" while creating a field right? 
Praveen Reddy 236Praveen Reddy 236
if we dont use database.stateful,it will not look all the records to identify duplicate records,so i have used database.staeful and getting an batchble instnace is too big exception,how can we avoid this exception here?
Pratima SethiPratima Sethi
public class DuplicateContactBatch implements Database.Batchable<SObject>,Database.Stateful  {
    
    Public Database.QueryLocator start(Database.BatchableContext bc){
        return Database.getQueryLocator([select id,Email from contact where Email !='Null']);
    }
    
    public void execute(Database.BatchableContext bc, List<Contact> scope){
        List<Contact> duplicateContact = new List<Contact>();
        set<String> s =new set<String>();
        for(Contact c:scope ){
            boolean status = s.add(c.Email);
            if(!status){
                duplicateContact.add(c);
            }
        }
        if(duplicateContact.size()>0){
            delete duplicateContact;
        }
        
    }
    
    public void finish(Database.BatchableContext bc){
        
    }

}