[docs]classCobolSegmenter(CodeSegmenter):"""Code segmenter for `COBOL`."""PARAGRAPH_PATTERN:Pattern=re.compile(r"^[A-Z0-9\-]+(\s+.*)?\.$",re.IGNORECASE)DIVISION_PATTERN:Pattern=re.compile(r"^\s*(IDENTIFICATION|DATA|PROCEDURE|ENVIRONMENT)\s+DIVISION.*$",re.IGNORECASE)SECTION_PATTERN:Pattern=re.compile(r"^\s*[A-Z0-9\-]+\s+SECTION.$",re.IGNORECASE)
[docs]defis_valid(self)->bool:# Identify presence of any division to validate COBOL codereturnany(self.DIVISION_PATTERN.match(line)forlineinself.source_lines)
def_extract_code(self,start_idx:int,end_idx:int)->str:return"\n".join(self.source_lines[start_idx:end_idx]).rstrip("\n")def_is_relevant_code(self,line:str)->bool:"""Check if a line is part of the procedure division or a relevant section."""if"PROCEDURE DIVISION"inline.upper():returnTrue# Add additional conditions for relevant sections if neededreturnFalsedef_process_lines(self,func:Callable)->List[str]:"""A generic function to process COBOL lines based on provided func."""elements:List[str]=[]start_idx=Noneinside_relevant_section=Falsefori,lineinenumerate(self.source_lines):ifself._is_relevant_code(line):inside_relevant_section=Trueifinside_relevant_sectionand(self.PARAGRAPH_PATTERN.match(line.strip().split(" ")[0])orself.SECTION_PATTERN.match(line.strip())):ifstart_idxisnotNone:func(elements,start_idx,i)start_idx=i# Handle the last element if existsifstart_idxisnotNone:func(elements,start_idx,len(self.source_lines))returnelements
[docs]defsimplify_code(self)->str:simplified_lines:List[str]=[]inside_relevant_section=Falseomitted_code_added=(False# To track if "* OMITTED CODE *" has been added after the last header)forlineinself.source_lines:is_header=("PROCEDURE DIVISION"inlineor"DATA DIVISION"inlineor"IDENTIFICATION DIVISION"inlineorself.PARAGRAPH_PATTERN.match(line.strip().split(" ")[0])orself.SECTION_PATTERN.match(line.strip()))ifis_header:inside_relevant_section=True# Reset the flag since we're entering a new section/division or# paragraphomitted_code_added=Falseifinside_relevant_section:ifis_header:# Add header and reset the omitted code added flagsimplified_lines.append(line)elifnotomitted_code_added:# Add omitted code comment only if it hasn't been added directly# after the last headersimplified_lines.append("* OMITTED CODE *")omitted_code_added=Truereturn"\n".join(simplified_lines)