Protoball:Export Chron for MLB.py

From Protoball
Jump to navigation Jump to search
def export_chron_for_mlb(get_site, get_ftp):
	from lxml import etree
	from lxml.etree import Element
	from lxml.etree import ElementTree
	from time import mktime
	from datetime import datetime
	import urllib

	def parse(site, text, title = None):
		kwargs = {}
		if title is None: kwargs['title'] = title
		result = site.api('parse', text = text, **kwargs)
		return result['parse']

	def CDATA(text):
		return etree.CDATA(text)

	def elem(tag, text=None, attrib=None, nsmap=None, **extra):
		el = etree.Element(tag, attrib, **extra)
		if text:
			el.text = text
		return el

	def field(key, text, primitative="String"):
		return elem("field", CDATA(text), primitive=primitative, key=key)

	def itemTag(tag):
		tag = tag_value(tag)
		return elem("itemTag", type="category", value=tag.lower(), displayName=tag)

	sub = {}
	sub['Western New York'] = 'WNY'
	sub['Pre-Knicks'] = 'PreKnicks'
	sub['New Jersey'] = 'NewJersey'
	sub['New England'] = 'NewEngland'
	sub['Illinois'] = 'IllinoisMissouri'
	sub['Bat-Ball'] = 'BatBall'
	sub['Town Ball'] = 'TownBall'
	sub['African Americans'] = 'AfricanAmericans'
	sub['Base Ball'] = 'BaseBall'

	def tag_value(tag):
		return sub.get(tag, tag)

	date_ranges = ["1000-1500", "1501-1700", "1701-1800", "1801-1825", "1826-1870"]
	
	def transfer_files(ftp):
		ftp.cwd("protoball.org/mlb")
		for date_range in date_ranges:
			f = open("/home/dave/Desktop/" + date_range + ".xml", "rb")
			ftp.storbinary("STOR " + date_range + ".xml", f)
			f.close()
		ftp.close()

	def update_files_page(site):
		page = site.Pages["Protoball:Extracts_for_MLB"]
		update_date = datetime.now().strftime("%A, %d. %B %Y %I:%M%p")
		date_range_links = "\n".join(["* http://protoball.org/mlb/{0}.xml".format(date_range) for date_range in date_ranges])
		page_text = """
Below are links to chronology entries in mlb xml format.

Last updated on {0}

== Files ==

{1}
""".format(update_date, date_range_links)
		page.save(page_text, "Post Time Update")

	def gen_xml(site):
		for date_range in date_ranges:
			root = elem("list")
			root.append(elem("maxsize", "1000"))
		
			from_date, to_date = date_range.split("-")
			ask = "{{{{#ask: [[Concept:Main Chronology]][[Year::> {0}]][[Year::< {1}]]|link=none|sort=Year|limit=10000}}}}".format(from_date, to_date)
			if from_date == "1000":
				ask = ask.replace("[[Year::> 1000]]", "")
			result = parse(site, ask)['text']['*']
			titles = result[3:result.find("\n</p>")].split(", ")

			for title in titles:
				page = site.Pages[title]

				text = page.edit()
				text = text[len("{{Chronology Entry\n|"):-3]
				page_info = {}
				for info in text.split("\n|"):
					if not "=" in info:
						print title + " missing data"
						continue
					key, data = info.split("=", 1)
					page_info[key] = data
			
				headline = page_info["Headline"]

				rev_count = 0
				first_rev = last_rev = None
				for rev in page.revisions():
					rev_count += 1
					if not last_rev:
						last_rev = rev
					first_rev = rev

				item = elem("item")
				item.set("name", headline)
				item.set("state", "A")
				item.set("namespaceId", "1")
				item.set("version", str(rev_count))
				item.set("id", str(page._info["pageid"]))
				created_on = datetime.fromtimestamp(mktime(first_rev['timestamp'])).isoformat()
				item.set("createdOn", created_on + "-0500")
				last_save = datetime.fromtimestamp(mktime(last_rev['timestamp'])).isoformat()
				item.set("lastSave", last_save + "-0500")
				user_date = "1000-01-01T00:01:00"
				if int(page_info["Year"]) > 1000:
					user_date = datetime(int(page_info["Year"]), 1, 1).isoformat()
				item.set("userDate", user_date + "-0500")
			
				item.append(elem("displayData", "name={0}".format(urllib.quote(headline.encode("utf-8")))))

				item.append(elem("lockInfo"))

				item.append(elem("itemType", key="chron", name="Chron", category="content"))
			
				appAccountIds = elem("appAccountIds")
				appAccountIds.append(elem("appId", "14"))
				item.append(appAccountIds)
			
				notation = page_info.get("Year Suffix", "") + title[title.find('.'):]
				item.append(field("notation", notation))
				item.append(field("year", page_info["Year"]))
				item.append(field("displayYear", title))
				item.append(field("title", headline))
				
				if not page_info.has_key("Text"):
					print title + " has no text"
					continue
				text = page_info["Text"]
				for note in ['Warning', 'Comment', 'Query']:
					if page_info.has_key(note):
						text += "\n" + page_info[note]
				item.append(field("description", text, "Clob"))

				if page_info.has_key("Tags"):
					for tag in page_info["Tags"].split(","):
						item.append(itemTag(tag))
				if page_info.has_key("Game"):
					item.append(itemTag(page_info["Game"]))
				if page_info.has_key("Location"):
					item.append(itemTag(page_info["Location"]))
				root.append(item)
			tree = ElementTree(root)
			f = open("/home/dave/Desktop/" + date_range + ".xml", "w")
			tree.write(f, encoding="UTF-8", pretty_print=True, xml_declaration=True, standalone=True)

	site = get_site()
	gen_xml(site)
	ftp = get_ftp()
	transfer_files(ftp)
	update_files_page(site)