app.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. import psycopg2
  2. from selenium import webdriver
  3. from selenium.webdriver.chrome.service import Service
  4. from selenium.webdriver.common.by import By
  5. from selenium.webdriver.common.keys import Keys
  6. from selenium.webdriver.chrome.options import Options
  7. from selenium.common.exceptions import NoSuchElementException
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.support import expected_conditions as EC
  10. import os
  11. from flask import Flask, request, jsonify
  12. from waitress import serve
  13. import pandas as pd
  14. from io import StringIO
  15. # Set options for the Chromium browser
  16. chrome_options = Options()
  17. chrome_options.add_argument("--headless") # Optional: Run Chromium in headless mode
  18. chrome_options.add_argument("--no-sandbox")
  19. chrome_options.add_argument("--disable-dev-shm-usage")
  20. # Specify the path to the Chromium driver
  21. service = Service('/usr/bin/chromedriver')
  22. driver = None
  23. def extract(username: str, password: str):
  24. url_credentials = f'https://{username}:{password}@sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx'
  25. url = 'https://sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx'
  26. username_integer = int(username[2:])
  27. def insert_alumno_extraccion(datos_html: str, materias_html: str, historial_html: str = 'error', materias_actuales_html: str = 'error'):
  28. try:
  29. conn = psycopg2.connect(
  30. dbname=os.getenv("DBNAME"),
  31. user=os.getenv("DBUSER"),
  32. password=os.getenv("DBPASSWORD"),
  33. host=os.getenv("DBHOST"),
  34. port=os.getenv("DBPORT")
  35. )
  36. cur = conn.cursor()
  37. insert_query = """
  38. INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", datos_html, materias_html, historial_html, materias_actuales_html) VALUES (%s, TRIM(%s), TRIM(%s), TRIM(%s)::JSONB, TRIM(%s))
  39. ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET datos_html = EXCLUDED.datos_html, materias_html = EXCLUDED.materias_html, error_message = NULL, registrado = DEFAULT;
  40. """
  41. cur.execute(insert_query, (username_integer, datos_html, materias_html, historial_html, materias_actuales_html))
  42. conn.commit()
  43. return cur.query.decode('utf-8')
  44. except psycopg2.ProgrammingError as e:
  45. print(f"Error de sintaxis: {e}")
  46. except psycopg2.IntegrityError as e:
  47. print(f"Error de integridad: {e}")
  48. except Exception as e:
  49. print(f"Error: {e}")
  50. finally:
  51. cur.close()
  52. conn.close()
  53. def update_alumno_extraccion_error(error: str):
  54. try:
  55. conn = psycopg2.connect(
  56. dbname=os.getenv("DBNAME"),
  57. user=os.getenv("DBUSER"),
  58. password=os.getenv("DBPASSWORD"),
  59. host=os.getenv("DBHOST"),
  60. port=os.getenv("DBPORT")
  61. )
  62. cur = conn.cursor()
  63. update_query = """
  64. INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", error_message) VALUES (%s, %s)
  65. ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET error_message = EXCLUDED.error_message,
  66. materias_html = DEFAULT, registrado = DEFAULT;
  67. """
  68. cur.execute(update_query, (username_integer, error[:255]))
  69. conn.commit()
  70. print("Data updated successfully")
  71. except psycopg2.ProgrammingError as e:
  72. print(f"Error de sintaxis: {e}")
  73. finally:
  74. cur.close()
  75. conn.close()
  76. try:
  77. driver.get(url_credentials)
  78. driver.get(url)
  79. # si no existe el elemento, ctl00_contenedor_control
  80. datos_html = driver.find_element(By.ID, 'ctl00_contenedor_control').get_attribute('innerHTML')
  81. elemento = WebDriverWait(driver, 3.5).until(
  82. EC.presence_of_element_located((By.ID, 'ctl00_contenedor_HistorialAlumno1_lblBtnSeccionHAcademico'))
  83. )
  84. elemento.click()
  85. # Get the HTML content of the materias element
  86. materias_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_divHAcademico').get_attribute('innerHTML')
  87. historial_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_gvMaterias').get_attribute('innerHTML')
  88. # materias_actuales_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_div13').get_attribute('innerHTML')
  89. historial_html_io = StringIO(f"<table>{historial_html}</table>")
  90. # Read the HTML table into a DataFrame
  91. df = pd.read_html(historial_html_io)[0]
  92. # Convert the DataFrame to JSON
  93. json_result = df[df['GRUPO'] != 'Promedio:'].to_json(orient='records')
  94. # Connect to PostgreSQL database
  95. query = insert_alumno_extraccion(datos_html, materias_html, json_result)
  96. print("Data extracted successfully")
  97. return json_result
  98. except NoSuchElementException as e:
  99. update_alumno_extraccion_error(str(e))
  100. def se_puede_extraer():
  101. try:
  102. conn = conn = psycopg2.connect(
  103. dbname=os.getenv("DBNAME"),
  104. user=os.getenv("DBUSER"),
  105. password=os.getenv("DBPASSWORD"),
  106. host=os.getenv("DBHOST"),
  107. port=os.getenv("DBPORT")
  108. )
  109. cursor = conn.cursor()
  110. # SELECCIONAR ULTIMA planeacion
  111. query = """
  112. SELECT 1
  113. FROM alumno_extraccion_fecha
  114. WHERE CURRENT_DATE BETWEEN fecha_inicio AND fecha_fin
  115. ORDER BY CREATED_AT DESC
  116. LIMIT 1;
  117. """
  118. # Ejecuta la consulta
  119. cursor.execute(query)
  120. result = cursor.fetchone()
  121. # Verifica si se obtuvo algún resultado
  122. exists = result is not None
  123. # Cierra el cursor y la conexión
  124. cursor.close()
  125. conn.close()
  126. return exists
  127. except Exception as e:
  128. print(f"Error: {e}")
  129. return False
  130. app = Flask(__name__)
  131. @app.route('/calificaciones', methods=['POST'])
  132. def main():
  133. global driver
  134. # Initialize the WebDriver
  135. driver = webdriver.Chrome(service=service, options=chrome_options)
  136. username = request.form.get('clave')
  137. password = request.form.get('password')
  138. if se_puede_extraer():
  139. query = extract(username, password)
  140. # Close the session
  141. driver.quit()
  142. return jsonify({"message": "Data extracted successfully"})
  143. if __name__ == '__main__':
  144. serve(app, host='0.0.0.0', port=5000)