Team:Virginia/Model/Files/cobraGeneric.py



"""
Visit COBRApy and Mackinac github pages for installation instructions

http://cobrapy.readthedocs.io/en/latest/index.html
https://github.com/mmundy42/mackinac
"""

from __future__ import print_function
import cobra
import os
import mackinac
from os.path import join

def boundary_list(model):
    """ 
    		Creates and returns the list of boundary reactions (exchange, demand, and sink-type reactions)
    """
    
    exList = []
    for rxn in model.reactions:
        if (rxn.boundary == True):
            exList.append(rxn)
    return exList

""" Defining parameters """

modelseed_id = '' # Patric/ModelSEED ID of the organism in study. Example: '266.10' - Paracoccus denitrificans
first_run = False # If True, will create and optimize a ModelSEED Model of the selected organism. WARNING: VERY SLOW! Set to False after the first execution of the script. Your data will be saved on ModelSEED website.
validate = False # Perform mass balance 
token_username = '' # Patric or RAST account login
token_password = '' # Patric or RAST account password
perform_fva = True # (bool) Set to True to perform FVA
export_to_escher = True # flag for export of the model to escher-compatible .json file
precision_fva = 0.95 # Fraction of Optimum for FVA. A good choice would be: 1 - (fraction difference between WT and synthetic biomass yields)
rxns_KO = [] # List of knockout reaction ID's. See generated rxnList.txt file for reaction data
simult_KO = [] # List of reactions for simultaneous knockout

# Sets data_dir to current working directory. Make sure the script is saved there.
data_dir = os.getcwd()
print("Script directory: ", data_dir)

# Reactions are written in terms of ModelSEED compounds. Uncomment the four commands below during the first run ONLY, as most data are subsequently saved on ModelSEED server. All Mackinac commands have their corresponding functionality on the website.

if(first_run):
    # Obtain authentication token from ModelSEED website. Requires PATRIC or RAST login. Default token_type argument is 'patric'
    mackinac.get_token(token_username, token_password, token_type='rast')
    
    # Reconstruct one of the existing models on complete media
    mackinac.reconstruct_modelseed_model(modelseed_id)
    
    # Gapfill the reconstructed model on complete media. Customizable: see source code.
    mackinac.gapfill_modelseed_model(modelseed_id)
    
    # Run Flux Balance Analysis on the model. Customizable: see source code.
    mackinac.optimize_modelseed_model(modelseed_id)

    # Convert an existing ModelSEED model to COBRApy model. 'validate=True' optional argument will report common errors with the model, e.g. mass/charge balance errors of reactions. Whether to fix them or not is optional -- they do not affect the FBA calculations.
    MODEL = mackinac.create_cobra_model_from_modelseed_model(modelseed_id, validate=True)
    
    # Creates a .json file of the original model
    cobra.io.save_json_model(MODEL, "initMODEL.json")
    
if(not(first_run)):
    MODEL = cobra.io.load_json_model(join(data_dir, "initMODEL.json"))
    print("Model loaded from an existing file: ", join(data_dir, "initMODEL.json\n"))

if(validate):
    print("---- MODEL VALIDATION BEGINS ----")
    print(cobra.manipulation.validate.check_mass_balance(MODEL))
    print(cobra.manipulation.validate.check_metabolite_compartment_formula(MODEL))
    print(cobra.manipulation.validate.check_reaction_bounds(MODEL))
    print("---- END OF VALIDATION ----")

from cobra import Model, Reaction, Metabolite # Import COBRA classes for the purpose of adding extra reactions and metabolites to the model

# Add precursor (e.g. initially missing) reactions and metabolites here

print('')
print(len(MODEL.reactions), "reactions initially")
print(len(MODEL.metabolites), "metabolites initially")
print(len(MODEL.genes), "genes initially")

# Locally execute FBA on the model.
sol_1 = MODEL.optimize()
print('Unmodified objective value: %3.15f\n' % sol_1.objective_value)

# Performs flux variability analysis (FVA) on the model. The result is a pandas.data_frame that contains the minimum and maximum flux range for exchange reactions within 0.1% of the optimal solution.
rxnList = boundary_list(MODEL)
if(perform_fva):
    from cobra.flux_analysis import flux_variability_analysis
    sol_fva1 = flux_variability_analysis(MODEL, rxnList, fraction_of_optimum=precision_fva, loopless=True)
    fva1_max = sol_fva1['maximum']
    fva1_min = sol_fva1['minimum']

""" Below is a list of compounds and reactions to be added to the model. All the data is available on the ModelSEED website, in the 'Biochemistry' section. It is advisable to stick to the conventions presented there and further in this script when adding your own elements. I.e, don't mess up the names.

# Example: rxn14202 (H2O[c] + 2Q[c] + NH2OH[c] <=> NO2[c] + 2QH2[c]
# cpd00001_c + cpd00165_c + 2cpd11669_c <=> cpd00075_c + 2cpd11665_c
rxn14202_c = Reaction('rxn14202_c')
rxn14202_c.name = 'hydroxylamine:ubiquinone oxidoreductase'
rxn14202_c.subsystem = 'Nitrogen Metabolism'
rxn14202_c.lower_bound = -1000. 
rxn14202_c.upper_bound = 1000. 
rxn14202_c.gene_reaction_rule = "( 915.3.peg.508 )"

rxn14202_c.add_metabolites({ 
    cpd00001_c: -1.0,
    cpd00165_c: -1.0,
    cpd11669_c: -2.0,
    cpd00075_c: 1.0,
    cpd11665_c: 2.0,
})

"""

# Append the above reactions to the model
MODEL.add_reactions([rxn14010_c, rxn00109_c, rxn00570_c, rxn14202_c, rxn06111_c])

# Maybe fixes something, maybe not, I don't know what it does
#   MODEL.repair()

print(len(MODEL.reactions), "reactions")
print(len(MODEL.metabolites), "metabolites")
print(len(MODEL.genes), "genes")

# Writes reaction and compound data into .txt files. The files are saved into the working directory of the script.
f = open(join(os.getcwd(), "rxnList.txt"), 'w')
for rxn in MODEL.reactions:
    f.write('Rxn ID: %s\n' % rxn.id)
    f.write('Rxn: %s\n' % rxn.name)
    f.write('%s\n\n' % rxn.reaction)
f.close()

f = open(join(os.getcwd(), "cpdList.txt"), 'w')
for cpd in MODEL.metabolites:
    f.write('%s (%s) : %s[%s]\n\n' % (cpd.id, cpd.name, cpd.formula, cpd.compartment))
f.close()

# Run FBA on the modified model
sol_2 = MODEL.optimize()
print('Modified objective value: %3.15f\n' % sol_2.objective_value)

# Writes the exchange reaction ("EX_...") fluxes into a file. These reactions, althought not strictly considered "reactions" because they're unbalanced, define the nutrients available for uptake from the media.
n_ex = 0 # Exchange reaction counter
rxns_EX = [] # List of exchange reactions for knockout
f = open(join(os.getcwd(), "EXflux.txt"), 'w')
for i in range(len(MODEL.reactions)):
    if MODEL.reactions[i].boundary == True:
    #if MODEL.reactions[i].id.startswith("EX_"):
        n_ex += 1
        rxns_EX.append(MODEL.reactions[i].id)
        f.write("(%35s) %8.4f %8.4f\n" % (MODEL.reactions[i].name, sol_1.fluxes[i], sol_2.fluxes[i]))
f.close()

# Performs FVA on the synthetic strain model
if(perform_fva):
    sol_fva2 = flux_variability_analysis(MODEL, rxnList, fraction_of_optimum=precision_fva, loopless=True)
    fva2_max = sol_fva2['maximum']
    fva2_min = sol_fva2['minimum']

    # Prints out FVA data in a column format: name, WT min, WT max, synthetic min, synthetic max fluxes.
    f = open(join(os.getcwd(), "FVA.txt"), 'w')
    for i in range(len(rxnList)):
            f.write("(%35s) %8.4f %8.4f %8.4f %8.4f\n" % (MODEL.reactions.get_by_id(sol_fva1.index[i]).name, fva1_min[i], fva1_max[i], fva2_min[i], fva2_max[i]))
    f.close()

# Saves the model into escher-compatible format.
if(export_to_escher):
    f = open(join(os.getcwd(), "MODEL_fluxes.json"), 'w')
    f.write('{')
    N_RXNS = len(MODEL.reactions)
    for i in range(N_RXNS):
        if(i == N_RXNS - 1):
            f.write('\"%s\": %f}' % (MODEL.reactions[i].name, sol_2.fluxes[i]))  
            break
        f.write('\"%s\": %f, ' % (MODEL.reactions[i].name, sol_2.fluxes[i]))
    f.close()    
    cobra.io.save_json_model(MODEL, "MODEL.json")
    
# Perform reversible reaction knockouts
print('Individual knockout: ')
for reaction in rxns_KO:
    with MODEL as model:
        rxn = model.reactions.get_by_id(reaction)
        rxn.knock_out()
        model.optimize()
        print(' %s blocked (bounds: %s, old/new growth rate %f/%f' % (rxn.name, str(rxn.bounds), sol_2.objective_value, model.objective.value))
print('')

# Performs simultaneous reaction knockouts
with MODEL as model:
    print('Simultaneous knockout:')
    for reaction in simult_KO:
        rxn = model.reactions.get_by_id(reaction)
        rxn.knock_out()
        print(' %s was blocked' % (rxn.name))
    model.optimize()
    print('Old/new growth rate %f/%f\n' % (sol_2.objective_value, model.objective.value))
    
# Perform exchange reaction knockouts
print('Exchange knockout: ')
for ex_reaction in rxns_EX:
    with MODEL as model:
        rxn = model.reactions.get_by_id(ex_reaction)
        rxn.knock_out()
        MODEL.optimize()
        print(' %s blocked (bounds: %s, old/new growth rate %f/%f)' % (rxn.name, str(rxn.bounds), sol_2.objective_value, MODEL.objective.value))  
print('') 
    
MODEL.optimize() # Re-optimize the model after knock-outs