Protoball:Export Chron for MLB.py
Jump to navigation
Jump to search
def export_chron_for_mlb(get_site, get_ftp): from lxml import etree from lxml.etree import Element from lxml.etree import ElementTree from time import mktime from datetime import datetime import urllib def parse(site, text, title = None): kwargs = {} if title is None: kwargs['title'] = title result = site.api('parse', text = text, **kwargs) return result['parse'] def CDATA(text): return etree.CDATA(text) def elem(tag, text=None, attrib=None, nsmap=None, **extra): el = etree.Element(tag, attrib, **extra) if text: el.text = text return el def field(key, text, primitative="String"): return elem("field", CDATA(text), primitive=primitative, key=key) def itemTag(tag): tag = tag_value(tag) return elem("itemTag", type="category", value=tag.lower(), displayName=tag) sub = {} sub['Western New York'] = 'WNY' sub['Pre-Knicks'] = 'PreKnicks' sub['New Jersey'] = 'NewJersey' sub['New England'] = 'NewEngland' sub['Illinois'] = 'IllinoisMissouri' sub['Bat-Ball'] = 'BatBall' sub['Town Ball'] = 'TownBall' sub['African Americans'] = 'AfricanAmericans' sub['Base Ball'] = 'BaseBall' def tag_value(tag): return sub.get(tag, tag) date_ranges = ["1000-1500", "1501-1700", "1701-1800", "1801-1825", "1826-1870"] def transfer_files(ftp): ftp.cwd("protoball.org/mlb") for date_range in date_ranges: f = open("/home/dave/Desktop/" + date_range + ".xml", "rb") ftp.storbinary("STOR " + date_range + ".xml", f) f.close() ftp.close() def update_files_page(site): page = site.Pages["Protoball:Extracts_for_MLB"] update_date = datetime.now().strftime("%A, %d. %B %Y %I:%M%p") date_range_links = "\n".join(["* http://protoball.org/mlb/{0}.xml".format(date_range) for date_range in date_ranges]) page_text = """ Below are links to chronology entries in mlb xml format. Last updated on {0} == Files == {1} """.format(update_date, date_range_links) page.save(page_text, "Post Time Update") def gen_xml(site): for date_range in date_ranges: root = elem("list") root.append(elem("maxsize", "1000")) from_date, to_date = date_range.split("-") ask = "{{{{#ask: [[Concept:Main Chronology]][[Year::> {0}]][[Year::< {1}]]|link=none|sort=Year|limit=10000}}}}".format(from_date, to_date) if from_date == "1000": ask = ask.replace("[[Year::> 1000]]", "") result = parse(site, ask)['text']['*'] titles = result[3:result.find("\n</p>")].split(", ") for title in titles: page = site.Pages[title] text = page.edit() text = text[len("{{Chronology Entry\n|"):-3] page_info = {} for info in text.split("\n|"): if not "=" in info: print title + " missing data" continue key, data = info.split("=", 1) page_info[key] = data headline = page_info["Headline"] rev_count = 0 first_rev = last_rev = None for rev in page.revisions(): rev_count += 1 if not last_rev: last_rev = rev first_rev = rev item = elem("item") item.set("name", headline) item.set("state", "A") item.set("namespaceId", "1") item.set("version", str(rev_count)) item.set("id", str(page._info["pageid"])) created_on = datetime.fromtimestamp(mktime(first_rev['timestamp'])).isoformat() item.set("createdOn", created_on + "-0500") last_save = datetime.fromtimestamp(mktime(last_rev['timestamp'])).isoformat() item.set("lastSave", last_save + "-0500") user_date = "1000-01-01T00:01:00" if int(page_info["Year"]) > 1000: user_date = datetime(int(page_info["Year"]), 1, 1).isoformat() item.set("userDate", user_date + "-0500") item.append(elem("displayData", "name={0}".format(urllib.quote(headline.encode("utf-8"))))) item.append(elem("lockInfo")) item.append(elem("itemType", key="chron", name="Chron", category="content")) appAccountIds = elem("appAccountIds") appAccountIds.append(elem("appId", "14")) item.append(appAccountIds) notation = page_info.get("Year Suffix", "") + title[title.find('.'):] item.append(field("notation", notation)) item.append(field("year", page_info["Year"])) item.append(field("displayYear", title)) item.append(field("title", headline)) if not page_info.has_key("Text"): print title + " has no text" continue text = page_info["Text"] for note in ['Warning', 'Comment', 'Query']: if page_info.has_key(note): text += "\n" + page_info[note] item.append(field("description", text, "Clob")) if page_info.has_key("Tags"): for tag in page_info["Tags"].split(","): item.append(itemTag(tag)) if page_info.has_key("Game"): item.append(itemTag(page_info["Game"])) if page_info.has_key("Location"): item.append(itemTag(page_info["Location"])) root.append(item) tree = ElementTree(root) f = open("/home/dave/Desktop/" + date_range + ".xml", "w") tree.write(f, encoding="UTF-8", pretty_print=True, xml_declaration=True, standalone=True) site = get_site() gen_xml(site) ftp = get_ftp() transfer_files(ftp) update_files_page(site)