importjsonimportpandasaspdimportnetworkxasnxfromtypingimportTYPE_CHECKINGfrom.utilsimportdetect_separator,find_three_column_format,mtg_loggerifTYPE_CHECKING:from.configimportConfig_logger_=mtg_logger(__name__)# Annotated .cx network related
[docs]defread_cyjson(filename:str,direction:bool=False)->nx.Graph:""" Function based on the corresponding of the manta library: https://github.com/ramellose/manta/blob/master/manta/cyjson.py Small utility function for reading Cytoscape json files generated with CoNet. In our case, it also gets the layout and adds it as part of the node data. Args: filename: Filepath to `.cyjs` network file. direction: If True, graph is imported as a :class:`networkx.DiGraph` Returns: A :class:`networkx.Graph` object. In the microbetag framework, it is being used to load the `manta` output. """withopen(filename)asf:data=json.load(f)name="name"ident="id"iflen(set([ident,name]))<2:raisenx.NetworkXError("Attribute names are not unique.")ifdirection:graph=nx.DiGraph()else:graph=nx.Graph()graph.graph=dict(data.get("data"))i=0fordindata["elements"]["nodes"]:# only modification: 'value' key is not included in CoNet output# now graph only needs ID and name valuesnode_data=d["data"].copy()position=d["position"]node_data["position"]=positiontry:node=d["data"].get(ident)exceptKeyError:# If no index is found, one is generatednode=ii+=1ifd["data"].get(name):node_data[name]=d["data"].get(name)graph.add_node(node)graph.nodes[node].update(node_data)fordindata["elements"]["edges"]:edge_data=d["data"].copy()sour=d["data"].pop("source")targ=d["data"].pop("target")graph.add_edge(sour,targ)graph.edges[sour,targ].update(edge_data)returngraph
[docs]defget_edgelist(network_file:str)->pd.DataFrame:""" Loads a 3-column network file as pd.DataFrame Args: network_file: Filepath to the edgelist. Returns: A 3-column pandas.DataFrame """delimiter=detect_separator(network_file)line_num,header=find_three_column_format(network_file,delimiter)edgelist=pd.read_csv(network_file,sep=delimiter,skiprows=line_num-1,header=header)edgelist.columns=["node_A","node_B","microbetag::weight"]returnedgelist
[docs]defbuild_base_graph(conf:"Config")->dict:# edgelist_as_a_list_of_dicts, microb_id_taxonomy,""" Builds a non-annotated graph in a .cyjs format, using only the scores and the taxonomies of the taxa of the network. To be used only when manta clustering has been asked. Args: conf: A utils.Config instance. Returns: The base network as a dictionary. Note: Runs if network clustering has been asked for from the user, converting the initial .csv edgelist to .cyjs since `manta` gets a .cyjs input file. """edgelist=get_edgelist(conf.network)edgelist.columns=["node_A","node_B","microbetag::weight"]edgelist_as_a_list_of_dicts=edgelist.to_dict(orient="records")base_network={}base_network["elements"]={}nodes=[]edges=[]processed_nodes=set()counter=1foredgeinedgelist_as_a_list_of_dicts:# Node Anode_name_a=edge["node_A"]is_taxon=Falseifnode_name_ainconf.seq_ids:is_taxon=Trueifnode_name_anotinprocessed_nodes:processed_nodes.add(node_name_a)node_a=_build_a_base_node(node_name_a,conf.seq_to_taxon_df,is_taxon)nodes.append(node_a)# Node Bnode_name_b=edge["node_B"]is_taxon=Falseifnode_name_binconf.seq_ids:is_taxon=Trueifnode_name_bnotinprocessed_nodes:processed_nodes.add(node_name_b)node_b=_build_a_base_node(node_name_b,conf.seq_to_taxon_df,is_taxon)nodes.append(node_b)# Edge A-Bnew_edge={}new_edge["data"]={}new_edge["data"]["id"]=str(counter)new_edge["data"]["source"]=node_name_anew_edge["data"]["target"]=node_name_bnew_edge["data"]["selected"]=Falsenew_edge["data"]["shared_name"]=(node_name_a.split(";")[-1]+"-"+node_name_b.split(";")[-1])new_edge["data"]["SUID"]=str(counter)new_edge["data"]["name"]="co-occurrence"new_edge["data"]["weight"]=float(edge["microbetag::weight"])new_edge["selected"]=Falseedges.append(new_edge)counter+=1# Ensure .cyjs formatbase_network["elements"]["nodes"]=nodesbase_network["elements"]["edges"]=edgesbase_network["data"]={}base_network["data"]["title"]="microbetag annotated microbial co-occurrence network"base_network["data"]["tags"]=["v1.0"]returnbase_network
def_build_a_base_node(node_name,map_seq,is_taxon:bool)->dict:""" Builds a node for the base network. """node={}node["data"]={}node["data"]["id"]=node_namenode["data"]["selected"]=False# if is_taxon:# case = map_seq[map_seq["sequence_id"] == node_name]# try:# node["data"]["taxonomy"] = case["taxonomy"].item()# node["data"]["name"] = case["taxonomy"].item().split(";")[-1]# except Exception:# _logger_.info("I could not get the taxonomy..")# _logger_.info(case)# pass# try:# node["data"]["GTDB-representative"] = case["gtdb_gen_repr"]# except Exception:# _logger_.info("I could not get the gtdb regpresentative genome")# _logger_.info(case)# passreturnnode