RWET: Final Documentation

For my final, I made a program through which I can write poems about other people.  Right now it’s still in a few different parts that I need to fit together, but eventually, you could download it and run it, enter some keywords and your gmail info, and it will generate a poem about you, written by me, in a PDF, that can be printed onto a piece of paper, folded up and turned into a little book, like this:

This happens in a few stages.  First, the program reads through all of the user’s saved gchats—one of the limitations is that you have to have a pretty significant log of gchats for this to work—and saves only the chats written by the user, using regex.  Right now, this process happens through three functions, which are based on Doug Hellman’s imaplib.

The first connects with Gmail:

#imaplib_connect.py

import imaplib
import ConfigParser
import os

def open_connection(u, p, verbose=False):
    # Read the config file
    config = ConfigParser.ConfigParser()
    config.read([os.path.expanduser('~/.pymotw')])

    # Connect to the server
    hostname = 'imap.gmail.com'
    if verbose: print 'Connecting to', hostname
    connection = imaplib.IMAP4_SSL(hostname, 993)

    # Login to our account
    username = u
    password = p
    if verbose: print 'Logging in as', username
    connection.login(username, password)
    return connection

if __name__ == '__main__':
    c = open_connection(verbose=True)
    try:
        print c
    finally:
        c.logout()

The second function downloads each gchat and returns the html representation.

# fetch.py

import imaplib
import email
import imaplib_connect

def get_msg(num, username, password):
	u = username
	p = password
	c = imaplib_connect.open_connection(u, p)
	try:
		c.select('[Gmail]/Chats', readonly=True)
		typ, msg_data = c.fetch(num, '(RFC822)')
		if msg_data[0] is None:
			return 0
		else:
			body = msg_data[0][1]

			for response_part in msg_data:
				if isinstance(response_part, tuple):
					msg = email.message_from_string(response_part[1])
					for part in msg.walk():
						if part.get_content_type() == 'text/html':
							return part.get_payload(None, True)

	finally:
		try:
			c.close()
		except:
				pass
				c.logout()

if __name__ == '__main__':
	import sys
	args = sys.argv

	m = get_msg(int(args[1]), 'username', 'password')
	print m

This function reads through the chat and saves the lines written by the user.

# gchats 4.23.2012

import requests
from bs4 import BeautifulSoup
import fetch
import re
import getpass

def get_chats():

	u = raw_input("Please enter Gmail username: ")
	if re.search(r'.+@gmail[.]com', u):
		print "Thanks!"
	else:
		u += "@gmail.com"
	p = getpass.getpass("Please enter password: ")
	print "Thanks!"

	txt = []
	is_it = 0

	counter = 1

	while counter > 0:
		print counter,
		m = fetch.get_msg(counter, u, p)
		if m is 0:
			counter = 0	
			print counter
		else:
			#print counter,
			counter += 1
			soup = BeautifulSoup(str(m))	
			lines = soup.find_all('div')
			for line in lines:
				line = line.text
				line = re.sub(r'dd:ddsPM', '', line)
				line = line.strip()
				if re.match(r'me:', line):
					is_it = 1
					line = re.sub(r'me:', '', line)
					line = line.strip()
					txt.append(line)
				else:
					if re.match(r'w+:', line):
						is_it = 0
					else:
						if is_it == 1:
							txt.append(line)

	return txt

if __name__ == '__main__':

	import sys
	import fetch

	chat = get_chats()

	filename = "spike/gchats.txt"
	FILE = open(filename, 'w')

	for line in chat:
		FILE.writelines(line)
		FILE.writelines('n')

	FILE.close()

After I have a full transcript of the user’s chats saved, I then run it through a few processes that generate a poem in the poetic style that I developed for my midterm, the alliteration machine.  I’m still tweaking this to get the best results, but currently, it first runs an algorithm that takes a key word or series of key words given by the user, such as “cat apple death” and creates a long poem from gchat lines by searching for the key word, finding a line, taking a two word phrase from that line and then searching again through a shuffled version of the corpus of gchats, and continues, increasing the length of the phrase until it cannot find a new line, at which point it takes the last word from the last line and repeats the process.  It repeats until it can no longer find new lines, which occurs when it finds a word that is only used once, often misspelled words, proper nouns, or other words that might be unusual for whatever reason.

Here is the first process, which generates a long and often repetetive poem from the raw gchats.

# this is supposed to do everything

class PmMkr(object):

	def __init__(self):
		self.poem = []

	def print_poem(self):
		print self.poem

	def get_poem(self):
		return self.poem

	def make_poem(self, key, chat, check):		
		import re
		import random

		debug = 1

		if debug is 0: print "NEW"

		k = key
		c = chat
		h = check
		b = 0
		newvar = 0

		new_line = ""
		punc = re.compile(r'[-.?!,'":;()|0-9]')

		rchat = list()

		for line in c:
			line = line.strip()
			rchat.append(line)
		random.shuffle(rchat)

		for line in rchat:

			line = line.strip()
			r1 = 'b' + k

			if re.search(r1, line):
				if line != h:
					#if b is 0: print line
					if b is 0: 
						self.poem.append(line + 'n')
					new_line = line

					r = k + 'W*sS+b'				
					if re.search(r, line):
						new_line = line
						#if b is 1: print line
						if b is 1: 
							self.poem.append(line + 'n')

						b = 0
						n = re.search(r, line)

						nk = n.group(0)
						nk = punc.sub("",nk)
						newvar = self.make_poem(nk, c, line)

					else:
						b = 1
				else:
					new_line = line
					b = 1

		if b is 1:
			b = 0
			h = h.strip()
			words = h.split(" ")
			nk = words[-1]
			nk = punc.sub("",nk)

			if nk != k and nk != "" and new_line != "":
				newvar = self.make_poem(nk, c, new_line)

			else:
				print "SHOULD BE OVER"
				return 1 
				#sys.exit(0)

if __name__ == '__main__':
	import sys
	import get_chats
	import amachine
	import re

	sys.setrecursionlimit(1000)

	args = sys.argv

	try:
	   chat = open('annchats.txt', 'r')
	except IOError as e:
		chat = get_chats.get_chats()
	  	print 'Getting chats.  This may take a while.'

	key_phrase = raw_input("Please enter keywords, I suggest 3 or more: ")
	key_phrase = re.sub(r'[,.-]', '', key_phrase)
	key_words = key_phrase.split(" ")

	filename = key_phrase + ".txt"
	open(filename, 'w').close()
	FILE = open(filename, 'a')	
	poemmaker = PmMkr()
	for key in key_words:
		poemmaker.make_poem(key, chat, "")
		p = poemmaker.get_poem()
		poem = amachine.amachine(p, key)
		FILE.writelines(poem)
		#for line in poem:
			#print line
			#print 'n'

		#poemmaker.print_poem()			
	FILE.close()

Then I run the results through the alliteration machine, which uses a Markov chain at the end to mix the results up a bit. The result is a poem varying in length usually from 20 to 80 lines or so.  I have experimented with switching the order of the Markov chain and alliteration machine, in order to keep the alliteration intact, but so far I prefer the results I get from using the Markov chain as the last process.

# hw-3 markov chain with a-machine

import sys
args = sys.argv
import re
from markov import MarkovGenerator

def amachine(txt, name):

	txt_lines = txt
	#print type(txt_lines)
#	print txt_lines
	name = re.sub(r'[aeiou]', '', name)

	key = ""
	tempKey = ""
	firstWord = 0
	keySet = 0
	tempKeySet = 0
	wordSet = 0
	notWord = 0
	lastWord = ""

	thePoem = ""

	# break through lines
	for line in txt_lines:
		#print line
		line.strip()
		#split words
		words = line.split(" ")
		#go through words
		#print words
		for word in words:

		#	if word == lastWord:
			if word != lastWord:
				#go through letters
				tempKeySet = keySet
				for letter in word:
					r = name + "lnkvt"
					if re.search(r'[%s]' % r, letter.lower()):				
						if notWord == 0:
							if keySet == 0:		
							# find the first vowel
								tempKey = letter.lower()
								keySet = 1
								wordSet = 1
							else:
								if letter.lower() == tempKey:
									keySet = 0
									wordSet = 1
								else: 
									wordSet = 0
									notWord = 1
									tempKey = key
									keySet = tempKeySet							

				if wordSet == 1 and notWord == 0:
					#print word
					key = tempKey
					#print key
					wordSet = 0
					thePoem += " " + word
					lastWord = word
					if firstWord == 0:
						firstWord = 1
				else:
					notWord = 0
					wordSet = 0
					if firstWord == 0:
						keySet = 0
						firstWord = 1

	#print "##" + thePoem 

	#that was the midterm
	#here's some markov

	generator = MarkovGenerator(n=1, max=500)

	line = thePoem.strip()
	generator.feed(line)

	genpoem = generator.generate()
	#print genpoem
	return genpoem

A lot of this code will probably change as I work toward creating a program that brings everything together, and I’ll try to replace it as I do that. Finally, once I’ve generated the poem, I run it through another program that I wrote in Processing, which formats the poem into a two page grid which can be folded into a book, and saves it as a PDF which can be printed onto one sheet of paper.

Here is the Processing code, which is also being tweaked.

import processing.pdf.*;


int w = int(72*8.5);
int h = 72*11;

boolean s = false;
boolean start = false;
boolean firstline = true;

PGraphicsPDF pdf;

int page = 0;
int lh = 16;
float butt = 0;
int q = 0;
float kx;
float ky;
float prevky;

int nk = 0;

int xmargin = 50;
int ymargin = 60;

int boxw = w/4 - xmargin;
int boxh = h/4 - ymargin;

int space;

String[] txtlines;

int frame = 0;

void setup() {
  size(w, h);
  pdf = (PGraphicsPDF)beginRecord(PDF, "pdf.pdf");
  txtlines = loadStrings("what-hair-sun-girl.txt");
  println(txtlines.length);

  for (int j = 0; j < txtlines.length; j++) {
    space += ceil(textWidth(txtlines[j])) / boxw;
  }
  space = space * lh;
  println (space);
  space = space/32;
  println (space);
  println(boxh);
  if (space > boxh) {
    space = boxh - 10;
  }
  println(space);

  //noLoop();
  frameRate(1);
  beginRecord(pdf);
}


void draw() {  
  background(255);

//  stroke(0);
//  line(w/4, 0, w/4, h);
//  line(w/2, 0, w/2, h);
//  line(3*w/4, 0, 3*w/4, h);
//
//  line(0, h/4, w, h/4);
//  line(0, h/2, w, h/2);
//  line(0, 3*h/4, w, 3*h/4);


  fill(0);
  textSize(10);

  if (page == 0) {
    for (int k = 0; k < txtlines.length; k++) {
      //print(q);
      if (q < 15) {
        nk = k;
        int nb = ceil(textWidth(txtlines[k])) / boxw;

        if ((5+(butt+nb)*lh) > space) {

          q++;
          butt = 0;
        }

        if (q == 1) {
          ///THE TITLE
          kx = 20 + 5 + q%4*(w/4);
          ky = 80 + floor(q/4)*h/4 + butt*lh;
          text("what hair sun girl", kx, ky, boxw, boxh);
          text("by owen ribbit", kx, ky + 14, boxw, boxh);
          k-=1;
          q++;
        } 
        else {

          if (floor(q/4) == 1 || floor(q/4) == 3) {
            kx = xmargin/2 + q%4*(w/4);
            ky = ymargin/2 + floor(q/4)*h/4 + butt*lh - h/4;
            pushMatrix();
            translate(w, h);
            rotate(PI);
            fill(255, 0, 0);
            //text(k, kx, ky);
            fill(0);
            text(txtlines[k], kx, ky, boxw, boxh);
            butt += ceil(textWidth(txtlines[k])/ (boxw));
            popMatrix();
          } 
          else {
            kx = xmargin/2 + q%4*(w/4);
            ky = ymargin/2 + floor(q/4)*h/4 + butt*lh;
            fill(255, 0, 0);
           // text(k, kx, ky);
            fill(0);
            text(txtlines[k], kx, ky, boxw, boxh);
            butt += ceil(textWidth(txtlines[k])/ (boxw));
          }
        }
      }
    } 
    pdf.nextPage();
    page++;
  }
  else if (page == 1) {

    for (int j = nk; j < txtlines.length; j++) {
      space += ceil(textWidth(txtlines[j])) / boxw;
    }
    space = space * lh;
    space = space/16;
    println(space);
    if (space > boxh) {
      space = boxh - 10;
    }
    println(space);


    butt = 0;
    q = 0;
    //println(nk);
    for (int k = nk; k < txtlines.length; k++) {

      //print(q);
      //print(5+butt*lh);
      //print(" ");
      //println(space);
      int nb = ceil(textWidth(txtlines[k])) / boxw;
      if ((5+(butt+nb)*lh) > space) {
        q++;
        butt = 0;
      }

      if (floor(q/4) == 1 || floor(q/4) == 3) {
        kx = xmargin/2 + q%4*(w/4);
        ky = ymargin/2 + floor(q/4)*h/4 + butt*lh - h/4;
        pushMatrix();
        translate(w, h);
        rotate(PI);
        fill(255, 0, 0);
       // text(k + " " + butt, kx, ky);
        fill(0);
        text(txtlines[k], kx, ky, boxw, boxh);
       
        butt += ceil(textWidth(txtlines[k])/ (boxw));
        if (butt >= 4) {
          butt++;
        }
        println(butt);
        popMatrix();
      } 
      else {
        kx = xmargin/2 + q%4*(w/4);
        ky = ymargin/2 + floor(q/4)*h/4 + butt*lh;
        fill(255, 0, 0);
        //text(k + " " + butt, kx, ky);
        fill(0);
        println(k + ": " + butt + " " + ky);
        text(txtlines[k], kx , ky, boxw, boxh);
        //println(k + ": " + textWidth(txtlines[k])/ (boxw) + txtlines  [k]);
        butt += ceil(textWidth(txtlines[k])/ (boxw));
        if (butt >= 4) {
          butt++;
        }
      }
    } 

    endRecord();
    noLoop();
  }
}

So, I’m looking into different ways to put everything together as one program that can be downloaded and run, so I can begin becoming a hugely prolific poet. Hopefully I’ll update with that progress soon.

Advertisements

Author: owen ribbit

poop

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s